LLVM 20.0.0git
ARMISelLowering.cpp
Go to the documentation of this file.
1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMISelLowering.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
20#include "ARMPerfectShuffle.h"
21#include "ARMRegisterInfo.h"
22#include "ARMSelectionDAGInfo.h"
23#include "ARMSubtarget.h"
27#include "Utils/ARMBaseInfo.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/BitVector.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/Statistic.h"
38#include "llvm/ADT/StringRef.h"
40#include "llvm/ADT/Twine.h"
67#include "llvm/IR/Attributes.h"
68#include "llvm/IR/CallingConv.h"
69#include "llvm/IR/Constant.h"
70#include "llvm/IR/Constants.h"
71#include "llvm/IR/DataLayout.h"
72#include "llvm/IR/DebugLoc.h"
74#include "llvm/IR/Function.h"
75#include "llvm/IR/GlobalAlias.h"
76#include "llvm/IR/GlobalValue.h"
78#include "llvm/IR/IRBuilder.h"
79#include "llvm/IR/InlineAsm.h"
80#include "llvm/IR/Instruction.h"
83#include "llvm/IR/Intrinsics.h"
84#include "llvm/IR/IntrinsicsARM.h"
85#include "llvm/IR/Module.h"
86#include "llvm/IR/Type.h"
87#include "llvm/IR/User.h"
88#include "llvm/IR/Value.h"
89#include "llvm/MC/MCInstrDesc.h"
91#include "llvm/MC/MCSchedule.h"
98#include "llvm/Support/Debug.h"
106#include <algorithm>
107#include <cassert>
108#include <cstdint>
109#include <cstdlib>
110#include <iterator>
111#include <limits>
112#include <optional>
113#include <tuple>
114#include <utility>
115#include <vector>
116
117using namespace llvm;
118using namespace llvm::PatternMatch;
119
120#define DEBUG_TYPE "arm-isel"
121
122STATISTIC(NumTailCalls, "Number of tail calls");
123STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
124STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
125STATISTIC(NumConstpoolPromoted,
126 "Number of constants with their storage promoted into constant pools");
127
128static cl::opt<bool>
129ARMInterworking("arm-interworking", cl::Hidden,
130 cl::desc("Enable / disable ARM interworking (for debugging only)"),
131 cl::init(true));
132
134 "arm-promote-constant", cl::Hidden,
135 cl::desc("Enable / disable promotion of unnamed_addr constants into "
136 "constant pools"),
137 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
139 "arm-promote-constant-max-size", cl::Hidden,
140 cl::desc("Maximum size of constant to promote into a constant pool"),
141 cl::init(64));
143 "arm-promote-constant-max-total", cl::Hidden,
144 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
145 cl::init(128));
146
148MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
149 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
150 cl::init(2));
151
152/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
153constexpr MVT FlagsVT = MVT::i32;
154
155// The APCS parameter registers.
156static const MCPhysReg GPRArgRegs[] = {
157 ARM::R0, ARM::R1, ARM::R2, ARM::R3
158};
159
161 SelectionDAG &DAG, const SDLoc &DL) {
163 assert(Arg.ArgVT.bitsLT(MVT::i32));
164 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, Arg.ArgVT, Value);
165 SDValue Ext =
167 MVT::i32, Trunc);
168 return Ext;
169}
170
171void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
172 if (VT != PromotedLdStVT) {
174 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
175
177 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
178 }
179
180 MVT ElemTy = VT.getVectorElementType();
181 if (ElemTy != MVT::f64)
185 if (ElemTy == MVT::i32) {
190 } else {
195 }
204 if (VT.isInteger()) {
208 }
209
210 // Neon does not support vector divide/remainder operations.
219
220 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
221 for (auto Opcode : {ISD::ABS, ISD::ABDS, ISD::ABDU, ISD::SMIN, ISD::SMAX,
223 setOperationAction(Opcode, VT, Legal);
224 if (!VT.isFloatingPoint())
225 for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
226 setOperationAction(Opcode, VT, Legal);
227}
228
229void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
230 addRegisterClass(VT, &ARM::DPRRegClass);
231 addTypeForNEON(VT, MVT::f64);
232}
233
234void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
235 addRegisterClass(VT, &ARM::DPairRegClass);
236 addTypeForNEON(VT, MVT::v2f64);
237}
238
239void ARMTargetLowering::setAllExpand(MVT VT) {
240 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
241 setOperationAction(Opc, VT, Expand);
242
243 // We support these really simple operations even on types where all
244 // the actual arithmetic has to be broken down into simpler
245 // operations or turned into library calls.
250}
251
252void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
253 LegalizeAction Action) {
254 setLoadExtAction(ISD::EXTLOAD, From, To, Action);
255 setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
256 setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
257}
258
259void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
260 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
261
262 for (auto VT : IntTypes) {
263 addRegisterClass(VT, &ARM::MQPRRegClass);
293
294 // No native support for these.
304
305 // Vector reductions
315
316 if (!HasMVEFP) {
321 } else {
324 }
325
326 // Pre and Post inc are supported on loads and stores
327 for (unsigned im = (unsigned)ISD::PRE_INC;
333 }
334 }
335
336 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
337 for (auto VT : FloatTypes) {
338 addRegisterClass(VT, &ARM::MQPRRegClass);
339 if (!HasMVEFP)
340 setAllExpand(VT);
341
342 // These are legal or custom whether we have MVE.fp or not
355
356 // Pre and Post inc are supported on loads and stores
357 for (unsigned im = (unsigned)ISD::PRE_INC;
363 }
364
365 if (HasMVEFP) {
373
374 // No native support for these.
389 }
390 }
391
392 // Custom Expand smaller than legal vector reductions to prevent false zero
393 // items being added.
402
403 // We 'support' these types up to bitcast/load/store level, regardless of
404 // MVE integer-only / float support. Only doing FP data processing on the FP
405 // vector types is inhibited at integer-only level.
406 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
407 for (auto VT : LongTypes) {
408 addRegisterClass(VT, &ARM::MQPRRegClass);
409 setAllExpand(VT);
415 }
417
418 // We can do bitwise operations on v2i64 vectors
419 setOperationAction(ISD::AND, MVT::v2i64, Legal);
420 setOperationAction(ISD::OR, MVT::v2i64, Legal);
421 setOperationAction(ISD::XOR, MVT::v2i64, Legal);
422
423 // It is legal to extload from v4i8 to v4i16 or v4i32.
424 addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
425 addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
426 addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
427
428 // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
434
435 // Some truncating stores are legal too.
436 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
437 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
438 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
439
440 // Pre and Post inc on these are legal, given the correct extends
441 for (unsigned im = (unsigned)ISD::PRE_INC;
443 for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
448 }
449 }
450
451 // Predicate types
452 const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
453 for (auto VT : pTypes) {
454 addRegisterClass(VT, &ARM::VCCRRegClass);
469
470 if (!HasMVEFP) {
475 }
476 }
480 setOperationAction(ISD::OR, MVT::v2i1, Expand);
486
495}
496
498 const ARMSubtarget &STI)
499 : TargetLowering(TM), Subtarget(&STI) {
500 RegInfo = Subtarget->getRegisterInfo();
501 Itins = Subtarget->getInstrItineraryData();
502
505
506 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
507 !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) {
508 bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
509 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
510 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
511 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
513 }
514
515 if (Subtarget->isTargetMachO()) {
516 // Uses VFP for Thumb libfuncs if available.
517 if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
518 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
519 static const struct {
520 const RTLIB::Libcall Op;
521 const char * const Name;
522 const ISD::CondCode Cond;
523 } LibraryCalls[] = {
524 // Single-precision floating-point arithmetic.
525 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
526 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
527 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
528 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
529
530 // Double-precision floating-point arithmetic.
531 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
532 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
533 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
534 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
535
536 // Single-precision comparisons.
537 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
538 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
539 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
540 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
541 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
542 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
543 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
544
545 // Double-precision comparisons.
546 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
547 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
548 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
549 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
550 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
551 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
552 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
553
554 // Floating-point to integer conversions.
555 // i64 conversions are done via library routines even when generating VFP
556 // instructions, so use the same ones.
557 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
558 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
559 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
560 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
561
562 // Conversions between floating types.
563 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
564 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
565
566 // Integer to floating-point conversions.
567 // i64 conversions are done via library routines even when generating VFP
568 // instructions, so use the same ones.
569 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
570 // e.g., __floatunsidf vs. __floatunssidfvfp.
571 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
572 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
573 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
574 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
575 };
576
577 for (const auto &LC : LibraryCalls) {
578 setLibcallName(LC.Op, LC.Name);
579 if (LC.Cond != ISD::SETCC_INVALID)
580 setCmpLibcallCC(LC.Op, LC.Cond);
581 }
582 }
583 }
584
585 // RTLIB
586 if (Subtarget->isAAPCS_ABI() &&
587 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
588 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
589 static const struct {
590 const RTLIB::Libcall Op;
591 const char * const Name;
592 const CallingConv::ID CC;
593 const ISD::CondCode Cond;
594 } LibraryCalls[] = {
595 // Double-precision floating-point arithmetic helper functions
596 // RTABI chapter 4.1.2, Table 2
597 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
598 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
599 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
600 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
601
602 // Double-precision floating-point comparison helper functions
603 // RTABI chapter 4.1.2, Table 3
604 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
605 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
606 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
607 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
608 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
609 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
610 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
611
612 // Single-precision floating-point arithmetic helper functions
613 // RTABI chapter 4.1.2, Table 4
614 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
615 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
616 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
617 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
618
619 // Single-precision floating-point comparison helper functions
620 // RTABI chapter 4.1.2, Table 5
621 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
622 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
623 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
624 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
625 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
626 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
627 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
628
629 // Floating-point to integer conversions.
630 // RTABI chapter 4.1.2, Table 6
631 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
633 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
634 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
635 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
636 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
637 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
638 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
639
640 // Conversions between floating types.
641 // RTABI chapter 4.1.2, Table 7
642 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
643 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
644 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
645
646 // Integer to floating-point conversions.
647 // RTABI chapter 4.1.2, Table 8
648 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
649 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
650 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
651 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
652 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
653 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
654 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
655 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
656
657 // Long long helper functions
658 // RTABI chapter 4.2, Table 9
659 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
660 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
661 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
662 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
663
664 // Integer division functions
665 // RTABI chapter 4.3.1
666 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
667 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
668 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
669 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
670 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
671 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
672 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
673 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
674 };
675
676 for (const auto &LC : LibraryCalls) {
677 setLibcallName(LC.Op, LC.Name);
678 setLibcallCallingConv(LC.Op, LC.CC);
679 if (LC.Cond != ISD::SETCC_INVALID)
680 setCmpLibcallCC(LC.Op, LC.Cond);
681 }
682
683 // EABI dependent RTLIB
684 if (TM.Options.EABIVersion == EABI::EABI4 ||
685 TM.Options.EABIVersion == EABI::EABI5) {
686 static const struct {
687 const RTLIB::Libcall Op;
688 const char *const Name;
689 const CallingConv::ID CC;
690 const ISD::CondCode Cond;
691 } MemOpsLibraryCalls[] = {
692 // Memory operations
693 // RTABI chapter 4.3.4
694 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
695 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
696 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
697 };
698
699 for (const auto &LC : MemOpsLibraryCalls) {
700 setLibcallName(LC.Op, LC.Name);
701 setLibcallCallingConv(LC.Op, LC.CC);
702 if (LC.Cond != ISD::SETCC_INVALID)
703 setCmpLibcallCC(LC.Op, LC.Cond);
704 }
705 }
706 }
707
708 if (Subtarget->isTargetWindows()) {
709 static const struct {
710 const RTLIB::Libcall Op;
711 const char * const Name;
712 const CallingConv::ID CC;
713 } LibraryCalls[] = {
714 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
715 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
716 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
717 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
718 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
719 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
720 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
721 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
722 };
723
724 for (const auto &LC : LibraryCalls) {
725 setLibcallName(LC.Op, LC.Name);
726 setLibcallCallingConv(LC.Op, LC.CC);
727 }
728 }
729
730 // Use divmod compiler-rt calls for iOS 5.0 and later.
731 if (Subtarget->isTargetMachO() &&
732 !(Subtarget->isTargetIOS() &&
733 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
734 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
735 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
736 }
737
738 // The half <-> float conversion functions are always soft-float on
739 // non-watchos platforms, but are needed for some targets which use a
740 // hard-float calling convention by default.
741 if (!Subtarget->isTargetWatchABI()) {
742 if (Subtarget->isAAPCS_ABI()) {
743 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
744 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
745 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
746 } else {
747 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
748 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
749 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
750 }
751 }
752
753 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
754 // a __gnu_ prefix (which is the default).
755 if (Subtarget->isTargetAEABI()) {
756 static const struct {
757 const RTLIB::Libcall Op;
758 const char * const Name;
759 const CallingConv::ID CC;
760 } LibraryCalls[] = {
761 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
762 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
763 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
764 };
765
766 for (const auto &LC : LibraryCalls) {
767 setLibcallName(LC.Op, LC.Name);
768 setLibcallCallingConv(LC.Op, LC.CC);
769 }
770 }
771
772 if (Subtarget->isThumb1Only())
773 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
774 else
775 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
776
777 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
778 Subtarget->hasFPRegs()) {
779 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
780 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
781
786
787 if (!Subtarget->hasVFP2Base())
788 setAllExpand(MVT::f32);
789 if (!Subtarget->hasFP64())
790 setAllExpand(MVT::f64);
791 }
792
793 if (Subtarget->hasFullFP16()) {
794 addRegisterClass(MVT::f16, &ARM::HPRRegClass);
797
800 }
801
802 if (Subtarget->hasBF16()) {
803 addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
804 setAllExpand(MVT::bf16);
805 if (!Subtarget->hasFullFP16())
807 } else {
810 }
811
813 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
814 setTruncStoreAction(VT, InnerVT, Expand);
815 addAllExtLoads(VT, InnerVT, Expand);
816 }
817
820
822 }
823
826
829
830 if (Subtarget->hasMVEIntegerOps())
831 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
832
833 // Combine low-overhead loop intrinsics so that we can lower i1 types.
834 if (Subtarget->hasLOB()) {
836 }
837
838 if (Subtarget->hasNEON()) {
839 addDRTypeForNEON(MVT::v2f32);
840 addDRTypeForNEON(MVT::v8i8);
841 addDRTypeForNEON(MVT::v4i16);
842 addDRTypeForNEON(MVT::v2i32);
843 addDRTypeForNEON(MVT::v1i64);
844
845 addQRTypeForNEON(MVT::v4f32);
846 addQRTypeForNEON(MVT::v2f64);
847 addQRTypeForNEON(MVT::v16i8);
848 addQRTypeForNEON(MVT::v8i16);
849 addQRTypeForNEON(MVT::v4i32);
850 addQRTypeForNEON(MVT::v2i64);
851
852 if (Subtarget->hasFullFP16()) {
853 addQRTypeForNEON(MVT::v8f16);
854 addDRTypeForNEON(MVT::v4f16);
855 }
856
857 if (Subtarget->hasBF16()) {
858 addQRTypeForNEON(MVT::v8bf16);
859 addDRTypeForNEON(MVT::v4bf16);
860 }
861 }
862
863 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
864 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
865 // none of Neon, MVE or VFP supports any arithmetic operations on it.
866 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
867 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
868 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
869 // FIXME: Code duplication: FDIV and FREM are expanded always, see
870 // ARMTargetLowering::addTypeForNEON method for details.
871 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
872 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
873 // FIXME: Create unittest.
874 // In another words, find a way when "copysign" appears in DAG with vector
875 // operands.
877 // FIXME: Code duplication: SETCC has custom operation action, see
878 // ARMTargetLowering::addTypeForNEON method for details.
880 // FIXME: Create unittest for FNEG and for FABS.
881 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
882 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
884 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
885 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
886 setOperationAction(ISD::FTAN, MVT::v2f64, Expand);
887 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
888 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
891 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
894 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
900 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
901 }
902
903 if (Subtarget->hasNEON()) {
904 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
905 // supported for v4f32.
907 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
908 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
909 setOperationAction(ISD::FTAN, MVT::v4f32, Expand);
910 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
911 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
914 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
922
923 // Mark v2f32 intrinsics.
925 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
926 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
927 setOperationAction(ISD::FTAN, MVT::v2f32, Expand);
928 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
929 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
932 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
940
941 // Neon does not support some operations on v1i64 and v2i64 types.
942 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
943 // Custom handling for some quad-vector types to detect VMULL.
944 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
945 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
946 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
947 // Custom handling for some vector types to avoid expensive expansions
948 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
950 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
952 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
953 // a destination type that is wider than the source, and nor does
954 // it have a FP_TO_[SU]INT instruction with a narrower destination than
955 // source.
964
967
968 // NEON does not have single instruction CTPOP for vectors with element
969 // types wider than 8-bits. However, custom lowering can leverage the
970 // v8i8/v16i8 vcnt instruction.
977
978 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
979 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
980
981 // NEON does not have single instruction CTTZ for vectors.
983 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
984 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
985 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
986
987 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
988 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
989 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
990 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
991
996
1001
1005 }
1006
1007 // NEON only has FMA instructions as of VFP4.
1008 if (!Subtarget->hasVFP4Base()) {
1009 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
1010 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
1011 }
1012
1015
1016 // It is legal to extload from v4i8 to v4i16 or v4i32.
1017 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
1018 MVT::v2i32}) {
1023 }
1024 }
1025
1026 for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1027 MVT::v4i32}) {
1032 }
1033 }
1034
1035 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1042 }
1043 if (Subtarget->hasMVEIntegerOps()) {
1046 ISD::SETCC});
1047 }
1048 if (Subtarget->hasMVEFloatOps()) {
1050 }
1051
1052 if (!Subtarget->hasFP64()) {
1053 // When targeting a floating-point unit with only single-precision
1054 // operations, f64 is legal for the few double-precision instructions which
1055 // are present However, no double-precision operations other than moves,
1056 // loads and stores are provided by the hardware.
1094 }
1095
1096 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1099 if (Subtarget->hasFullFP16()) {
1102 }
1103 }
1104
1105 if (!Subtarget->hasFP16()) {
1108 }
1109
1111
1112 // ARM does not have floating-point extending loads.
1113 for (MVT VT : MVT::fp_valuetypes()) {
1114 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1115 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
1116 }
1117
1118 // ... or truncating stores
1119 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1120 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
1121 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
1122
1123 // ARM does not have i1 sign extending load.
1124 for (MVT VT : MVT::integer_valuetypes())
1125 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
1126
1127 // ARM supports all 4 flavors of integer indexed load / store.
1128 if (!Subtarget->isThumb1Only()) {
1129 for (unsigned im = (unsigned)ISD::PRE_INC;
1131 setIndexedLoadAction(im, MVT::i1, Legal);
1132 setIndexedLoadAction(im, MVT::i8, Legal);
1133 setIndexedLoadAction(im, MVT::i16, Legal);
1134 setIndexedLoadAction(im, MVT::i32, Legal);
1135 setIndexedStoreAction(im, MVT::i1, Legal);
1136 setIndexedStoreAction(im, MVT::i8, Legal);
1137 setIndexedStoreAction(im, MVT::i16, Legal);
1138 setIndexedStoreAction(im, MVT::i32, Legal);
1139 }
1140 } else {
1141 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1144 }
1145
1150
1153 if (Subtarget->hasDSP()) {
1162 }
1163 if (Subtarget->hasBaseDSP()) {
1166 }
1167
1168 // i64 operation support.
1171 if (Subtarget->isThumb1Only()) {
1174 }
1175 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1176 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1178
1188
1189 // MVE lowers 64 bit shifts to lsll and lsrl
1190 // assuming that ISD::SRL and SRA of i64 are already marked custom
1191 if (Subtarget->hasMVEIntegerOps())
1193
1194 // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1195 if (Subtarget->isThumb1Only()) {
1199 }
1200
1201 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1203
1204 // ARM does not have ROTL.
1209 }
1212 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1215 }
1216
1217 // @llvm.readcyclecounter requires the Performance Monitors extension.
1218 // Default to the 0 expansion on unsupported platforms.
1219 // FIXME: Technically there are older ARM CPUs that have
1220 // implementation-specific ways of obtaining this information.
1221 if (Subtarget->hasPerfMon())
1223
1224 // Only ARMv6 has BSWAP.
1225 if (!Subtarget->hasV6Ops())
1227
1228 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1229 : Subtarget->hasDivideInARMMode();
1230 if (!hasDivide) {
1231 // These are expanded into libcalls if the cpu doesn't have HW divider.
1234 }
1235
1236 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1239
1242 }
1243
1246
1247 // Register based DivRem for AEABI (RTABI 4.2)
1248 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1249 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1250 Subtarget->isTargetWindows()) {
1253 HasStandaloneRem = false;
1254
1255 if (Subtarget->isTargetWindows()) {
1256 const struct {
1257 const RTLIB::Libcall Op;
1258 const char * const Name;
1259 const CallingConv::ID CC;
1260 } LibraryCalls[] = {
1261 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1262 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1263 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1264 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1265
1266 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1267 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1268 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1269 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1270 };
1271
1272 for (const auto &LC : LibraryCalls) {
1273 setLibcallName(LC.Op, LC.Name);
1274 setLibcallCallingConv(LC.Op, LC.CC);
1275 }
1276 } else {
1277 const struct {
1278 const RTLIB::Libcall Op;
1279 const char * const Name;
1280 const CallingConv::ID CC;
1281 } LibraryCalls[] = {
1282 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1283 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1284 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1285 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1286
1287 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1288 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1289 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1290 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1291 };
1292
1293 for (const auto &LC : LibraryCalls) {
1294 setLibcallName(LC.Op, LC.Name);
1295 setLibcallCallingConv(LC.Op, LC.CC);
1296 }
1297 }
1298
1303 } else {
1306 }
1307
1312
1313 setOperationAction(ISD::TRAP, MVT::Other, Legal);
1315
1316 // Use the default implementation.
1318 setOperationAction(ISD::VAARG, MVT::Other, Expand);
1320 setOperationAction(ISD::VAEND, MVT::Other, Expand);
1323
1324 if (Subtarget->isTargetWindows())
1326 else
1328
1329 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1330 // the default expansion.
1331 InsertFencesForAtomic = false;
1332 if (Subtarget->hasAnyDataBarrier() &&
1333 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1334 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1335 // to ldrex/strex loops already.
1337 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1339
1340 // On v8, we have particularly efficient implementations of atomic fences
1341 // if they can be combined with nearby atomic loads and stores.
1342 if (!Subtarget->hasAcquireRelease() ||
1343 getTargetMachine().getOptLevel() == CodeGenOptLevel::None) {
1344 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1345 InsertFencesForAtomic = true;
1346 }
1347 } else {
1348 // If there's anything we can use as a barrier, go through custom lowering
1349 // for ATOMIC_FENCE.
1350 // If target has DMB in thumb, Fences can be inserted.
1351 if (Subtarget->hasDataBarrier())
1352 InsertFencesForAtomic = true;
1353
1355 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1356
1357 // Set them all for libcall, which will force libcalls.
1370 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1371 // Unordered/Monotonic case.
1372 if (!InsertFencesForAtomic) {
1375 }
1376 }
1377
1378 // Compute supported atomic widths.
1379 if (Subtarget->isTargetLinux() ||
1380 (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1381 // For targets where __sync_* routines are reliably available, we use them
1382 // if necessary.
1383 //
1384 // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
1385 // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
1386 //
1387 // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
1388 // such targets should provide __sync_* routines, which use the ARM mode
1389 // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
1390 // encoding; see ARMISD::MEMBARRIER_MCR.)
1392 } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1393 Subtarget->hasForced32BitAtomics()) {
1394 // Cortex-M (besides Cortex-M0) have 32-bit atomics.
1396 } else {
1397 // We can't assume anything about other targets; just use libatomic
1398 // routines.
1400 }
1401
1403
1405
1406 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1407 if (!Subtarget->hasV6Ops()) {
1410 }
1412
1413 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1414 !Subtarget->isThumb1Only()) {
1415 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1416 // iff target supports vfp2.
1426 }
1427
1428 // We want to custom lower some of our intrinsics.
1433 if (Subtarget->useSjLjEH())
1434 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1435
1445 if (Subtarget->hasFullFP16()) {
1449 }
1450
1452
1455 if (Subtarget->hasFullFP16())
1459 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1460
1461 // We don't support sin/cos/fmod/copysign/pow
1470 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1471 !Subtarget->isThumb1Only()) {
1474 }
1477
1478 if (!Subtarget->hasVFP4Base()) {
1481 }
1482
1483 // Various VFP goodness
1484 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1485 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1486 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1489 }
1490
1491 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1492 if (!Subtarget->hasFP16()) {
1495 }
1496
1497 // Strict floating-point comparisons need custom lowering.
1504 }
1505
1506 // Use __sincos_stret if available.
1507 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1508 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1511 }
1512
1513 // FP-ARMv8 implements a lot of rounding-like FP operations.
1514 if (Subtarget->hasFPARMv8Base()) {
1523 if (Subtarget->hasNEON()) {
1528 }
1529
1530 if (Subtarget->hasFP64()) {
1539 }
1540 }
1541
1542 // FP16 often need to be promoted to call lib functions
1543 if (Subtarget->hasFullFP16()) {
1558
1560 }
1561
1562 if (Subtarget->hasNEON()) {
1563 // vmin and vmax aren't available in a scalar form, so we can use
1564 // a NEON instruction with an undef lane instead.
1573
1574 if (Subtarget->hasFullFP16()) {
1579
1584 }
1585 }
1586
1587 // On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has
1588 // it, but it's just a wrapper around ldexp.
1589 if (Subtarget->isTargetWindows()) {
1591 if (isOperationExpand(Op, MVT::f32))
1592 setOperationAction(Op, MVT::f32, Promote);
1593 }
1594
1595 // LegalizeDAG currently can't expand fp16 LDEXP/FREXP on targets where i16
1596 // isn't legal.
1598 if (isOperationExpand(Op, MVT::f16))
1599 setOperationAction(Op, MVT::f16, Promote);
1600
1601 // We have target-specific dag combine patterns for the following nodes:
1602 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1605
1606 if (Subtarget->hasMVEIntegerOps())
1608
1609 if (Subtarget->hasV6Ops())
1611 if (Subtarget->isThumb1Only())
1613 // Attempt to lower smin/smax to ssat/usat
1614 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1615 Subtarget->isThumb2()) {
1617 }
1618
1620
1621 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1622 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1624 else
1626
1627 //// temporary - rewrite interface to use type
1630 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1632 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1634
1635 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1636 // are at least 4 bytes aligned.
1638
1639 // Prefer likely predicted branches to selects on out-of-order cores.
1640 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1641
1644 Align(1ULL << Subtarget->getPreferBranchLogAlignment()));
1645
1646 setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1647}
1648
1650 return Subtarget->useSoftFloat();
1651}
1652
1653// FIXME: It might make sense to define the representative register class as the
1654// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1655// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1656// SPR's representative would be DPR_VFP2. This should work well if register
1657// pressure tracking were modified such that a register use would increment the
1658// pressure of the register class's representative and all of it's super
1659// classes' representatives transitively. We have not implemented this because
1660// of the difficulty prior to coalescing of modeling operand register classes
1661// due to the common occurrence of cross class copies and subregister insertions
1662// and extractions.
1663std::pair<const TargetRegisterClass *, uint8_t>
1665 MVT VT) const {
1666 const TargetRegisterClass *RRC = nullptr;
1667 uint8_t Cost = 1;
1668 switch (VT.SimpleTy) {
1669 default:
1671 // Use DPR as representative register class for all floating point
1672 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1673 // the cost is 1 for both f32 and f64.
1674 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1675 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1676 RRC = &ARM::DPRRegClass;
1677 // When NEON is used for SP, only half of the register file is available
1678 // because operations that define both SP and DP results will be constrained
1679 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1680 // coalescing by double-counting the SP regs. See the FIXME above.
1681 if (Subtarget->useNEONForSinglePrecisionFP())
1682 Cost = 2;
1683 break;
1684 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1685 case MVT::v4f32: case MVT::v2f64:
1686 RRC = &ARM::DPRRegClass;
1687 Cost = 2;
1688 break;
1689 case MVT::v4i64:
1690 RRC = &ARM::DPRRegClass;
1691 Cost = 4;
1692 break;
1693 case MVT::v8i64:
1694 RRC = &ARM::DPRRegClass;
1695 Cost = 8;
1696 break;
1697 }
1698 return std::make_pair(RRC, Cost);
1699}
1700
1701const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1702#define MAKE_CASE(V) \
1703 case V: \
1704 return #V;
1705 switch ((ARMISD::NodeType)Opcode) {
1707 break;
1910#undef MAKE_CASE
1911 }
1912 return nullptr;
1913}
1914
1916 EVT VT) const {
1917 if (!VT.isVector())
1918 return getPointerTy(DL);
1919
1920 // MVE has a predicate register.
1921 if ((Subtarget->hasMVEIntegerOps() &&
1922 (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
1923 VT == MVT::v16i8)) ||
1924 (Subtarget->hasMVEFloatOps() &&
1925 (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
1926 return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1928}
1929
1930/// getRegClassFor - Return the register class that should be used for the
1931/// specified value type.
1932const TargetRegisterClass *
1933ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1934 (void)isDivergent;
1935 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1936 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1937 // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1938 // MVE Q registers.
1939 if (Subtarget->hasNEON()) {
1940 if (VT == MVT::v4i64)
1941 return &ARM::QQPRRegClass;
1942 if (VT == MVT::v8i64)
1943 return &ARM::QQQQPRRegClass;
1944 }
1945 if (Subtarget->hasMVEIntegerOps()) {
1946 if (VT == MVT::v4i64)
1947 return &ARM::MQQPRRegClass;
1948 if (VT == MVT::v8i64)
1949 return &ARM::MQQQQPRRegClass;
1950 }
1952}
1953
1954// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1955// source/dest is aligned and the copy size is large enough. We therefore want
1956// to align such objects passed to memory intrinsics.
1958 Align &PrefAlign) const {
1959 if (!isa<MemIntrinsic>(CI))
1960 return false;
1961 MinSize = 8;
1962 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1963 // cycle faster than 4-byte aligned LDM.
1964 PrefAlign =
1965 (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
1966 return true;
1967}
1968
1969// Create a fast isel object.
1970FastISel *
1972 const TargetLibraryInfo *libInfo) const {
1973 return ARM::createFastISel(funcInfo, libInfo);
1974}
1975
1977 unsigned NumVals = N->getNumValues();
1978 if (!NumVals)
1979 return Sched::RegPressure;
1980
1981 for (unsigned i = 0; i != NumVals; ++i) {
1982 EVT VT = N->getValueType(i);
1983 if (VT == MVT::Glue || VT == MVT::Other)
1984 continue;
1985 if (VT.isFloatingPoint() || VT.isVector())
1986 return Sched::ILP;
1987 }
1988
1989 if (!N->isMachineOpcode())
1990 return Sched::RegPressure;
1991
1992 // Load are scheduled for latency even if there instruction itinerary
1993 // is not available.
1994 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1995 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1996
1997 if (MCID.getNumDefs() == 0)
1998 return Sched::RegPressure;
1999 if (!Itins->isEmpty() &&
2000 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2U)
2001 return Sched::ILP;
2002
2003 return Sched::RegPressure;
2004}
2005
2006//===----------------------------------------------------------------------===//
2007// Lowering Code
2008//===----------------------------------------------------------------------===//
2009
2010static bool isSRL16(const SDValue &Op) {
2011 if (Op.getOpcode() != ISD::SRL)
2012 return false;
2013 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2014 return Const->getZExtValue() == 16;
2015 return false;
2016}
2017
2018static bool isSRA16(const SDValue &Op) {
2019 if (Op.getOpcode() != ISD::SRA)
2020 return false;
2021 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2022 return Const->getZExtValue() == 16;
2023 return false;
2024}
2025
2026static bool isSHL16(const SDValue &Op) {
2027 if (Op.getOpcode() != ISD::SHL)
2028 return false;
2029 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2030 return Const->getZExtValue() == 16;
2031 return false;
2032}
2033
2034// Check for a signed 16-bit value. We special case SRA because it makes it
2035// more simple when also looking for SRAs that aren't sign extending a
2036// smaller value. Without the check, we'd need to take extra care with
2037// checking order for some operations.
2038static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
2039 if (isSRA16(Op))
2040 return isSHL16(Op.getOperand(0));
2041 return DAG.ComputeNumSignBits(Op) == 17;
2042}
2043
2044/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
2046 switch (CC) {
2047 default: llvm_unreachable("Unknown condition code!");
2048 case ISD::SETNE: return ARMCC::NE;
2049 case ISD::SETEQ: return ARMCC::EQ;
2050 case ISD::SETGT: return ARMCC::GT;
2051 case ISD::SETGE: return ARMCC::GE;
2052 case ISD::SETLT: return ARMCC::LT;
2053 case ISD::SETLE: return ARMCC::LE;
2054 case ISD::SETUGT: return ARMCC::HI;
2055 case ISD::SETUGE: return ARMCC::HS;
2056 case ISD::SETULT: return ARMCC::LO;
2057 case ISD::SETULE: return ARMCC::LS;
2058 }
2059}
2060
2061/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
2063 ARMCC::CondCodes &CondCode2) {
2064 CondCode2 = ARMCC::AL;
2065 switch (CC) {
2066 default: llvm_unreachable("Unknown FP condition!");
2067 case ISD::SETEQ:
2068 case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
2069 case ISD::SETGT:
2070 case ISD::SETOGT: CondCode = ARMCC::GT; break;
2071 case ISD::SETGE:
2072 case ISD::SETOGE: CondCode = ARMCC::GE; break;
2073 case ISD::SETOLT: CondCode = ARMCC::MI; break;
2074 case ISD::SETOLE: CondCode = ARMCC::LS; break;
2075 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
2076 case ISD::SETO: CondCode = ARMCC::VC; break;
2077 case ISD::SETUO: CondCode = ARMCC::VS; break;
2078 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
2079 case ISD::SETUGT: CondCode = ARMCC::HI; break;
2080 case ISD::SETUGE: CondCode = ARMCC::PL; break;
2081 case ISD::SETLT:
2082 case ISD::SETULT: CondCode = ARMCC::LT; break;
2083 case ISD::SETLE:
2084 case ISD::SETULE: CondCode = ARMCC::LE; break;
2085 case ISD::SETNE:
2086 case ISD::SETUNE: CondCode = ARMCC::NE; break;
2087 }
2088}
2089
2090//===----------------------------------------------------------------------===//
2091// Calling Convention Implementation
2092//===----------------------------------------------------------------------===//
2093
2094/// getEffectiveCallingConv - Get the effective calling convention, taking into
2095/// account presence of floating point hardware and calling convention
2096/// limitations, such as support for variadic functions.
2098ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
2099 bool isVarArg) const {
2100 switch (CC) {
2101 default:
2102 report_fatal_error("Unsupported calling convention");
2105 case CallingConv::GHC:
2107 return CC;
2113 case CallingConv::Swift:
2116 case CallingConv::C:
2117 case CallingConv::Tail:
2118 if (!Subtarget->isAAPCS_ABI())
2119 return CallingConv::ARM_APCS;
2120 else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
2121 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2122 !isVarArg)
2124 else
2126 case CallingConv::Fast:
2128 if (!Subtarget->isAAPCS_ABI()) {
2129 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2130 return CallingConv::Fast;
2131 return CallingConv::ARM_APCS;
2132 } else if (Subtarget->hasVFP2Base() &&
2133 !Subtarget->isThumb1Only() && !isVarArg)
2135 else
2137 }
2138}
2139
2141 bool isVarArg) const {
2142 return CCAssignFnForNode(CC, false, isVarArg);
2143}
2144
2146 bool isVarArg) const {
2147 return CCAssignFnForNode(CC, true, isVarArg);
2148}
2149
2150/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2151/// CallingConvention.
2152CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2153 bool Return,
2154 bool isVarArg) const {
2155 switch (getEffectiveCallingConv(CC, isVarArg)) {
2156 default:
2157 report_fatal_error("Unsupported calling convention");
2159 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2161 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2163 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2164 case CallingConv::Fast:
2165 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2166 case CallingConv::GHC:
2167 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2169 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2171 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2173 return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2174 }
2175}
2176
2177SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2178 MVT LocVT, MVT ValVT, SDValue Val) const {
2179 Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2180 Val);
2181 if (Subtarget->hasFullFP16()) {
2182 Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2183 } else {
2184 Val = DAG.getNode(ISD::TRUNCATE, dl,
2185 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2186 Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2187 }
2188 return Val;
2189}
2190
2191SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2192 MVT LocVT, MVT ValVT,
2193 SDValue Val) const {
2194 if (Subtarget->hasFullFP16()) {
2195 Val = DAG.getNode(ARMISD::VMOVrh, dl,
2196 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2197 } else {
2198 Val = DAG.getNode(ISD::BITCAST, dl,
2199 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2200 Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2201 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2202 }
2203 return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2204}
2205
2206/// LowerCallResult - Lower the result values of a call into the
2207/// appropriate copies out of appropriate physical registers.
2208SDValue ARMTargetLowering::LowerCallResult(
2209 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
2210 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2211 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2212 SDValue ThisVal, bool isCmseNSCall) const {
2213 // Assign locations to each value returned by this call.
2215 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2216 *DAG.getContext());
2217 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2218
2219 // Copy all of the result registers out of their specified physreg.
2220 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2221 CCValAssign VA = RVLocs[i];
2222
2223 // Pass 'this' value directly from the argument to return value, to avoid
2224 // reg unit interference
2225 if (i == 0 && isThisReturn) {
2226 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
2227 "unexpected return calling convention register assignment");
2228 InVals.push_back(ThisVal);
2229 continue;
2230 }
2231
2232 SDValue Val;
2233 if (VA.needsCustom() &&
2234 (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2235 // Handle f64 or half of a v2f64.
2236 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2237 InGlue);
2238 Chain = Lo.getValue(1);
2239 InGlue = Lo.getValue(2);
2240 VA = RVLocs[++i]; // skip ahead to next loc
2241 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2242 InGlue);
2243 Chain = Hi.getValue(1);
2244 InGlue = Hi.getValue(2);
2245 if (!Subtarget->isLittle())
2246 std::swap (Lo, Hi);
2247 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2248
2249 if (VA.getLocVT() == MVT::v2f64) {
2250 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2251 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2252 DAG.getConstant(0, dl, MVT::i32));
2253
2254 VA = RVLocs[++i]; // skip ahead to next loc
2255 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2256 Chain = Lo.getValue(1);
2257 InGlue = Lo.getValue(2);
2258 VA = RVLocs[++i]; // skip ahead to next loc
2259 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2260 Chain = Hi.getValue(1);
2261 InGlue = Hi.getValue(2);
2262 if (!Subtarget->isLittle())
2263 std::swap (Lo, Hi);
2264 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2265 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2266 DAG.getConstant(1, dl, MVT::i32));
2267 }
2268 } else {
2269 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2270 InGlue);
2271 Chain = Val.getValue(1);
2272 InGlue = Val.getValue(2);
2273 }
2274
2275 switch (VA.getLocInfo()) {
2276 default: llvm_unreachable("Unknown loc info!");
2277 case CCValAssign::Full: break;
2278 case CCValAssign::BCvt:
2279 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2280 break;
2281 }
2282
2283 // f16 arguments have their size extended to 4 bytes and passed as if they
2284 // had been copied to the LSBs of a 32-bit register.
2285 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2286 if (VA.needsCustom() &&
2287 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2288 Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2289
2290 // On CMSE Non-secure Calls, call results (returned values) whose bitwidth
2291 // is less than 32 bits must be sign- or zero-extended after the call for
2292 // security reasons. Although the ABI mandates an extension done by the
2293 // callee, the latter cannot be trusted to follow the rules of the ABI.
2294 const ISD::InputArg &Arg = Ins[VA.getValNo()];
2295 if (isCmseNSCall && Arg.ArgVT.isScalarInteger() &&
2296 VA.getLocVT().isScalarInteger() && Arg.ArgVT.bitsLT(MVT::i32))
2297 Val = handleCMSEValue(Val, Arg, DAG, dl);
2298
2299 InVals.push_back(Val);
2300 }
2301
2302 return Chain;
2303}
2304
2305std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2306 const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
2307 bool IsTailCall, int SPDiff) const {
2308 SDValue DstAddr;
2309 MachinePointerInfo DstInfo;
2310 int32_t Offset = VA.getLocMemOffset();
2312
2313 if (IsTailCall) {
2314 Offset += SPDiff;
2315 auto PtrVT = getPointerTy(DAG.getDataLayout());
2316 int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2317 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2318 DstAddr = DAG.getFrameIndex(FI, PtrVT);
2319 DstInfo =
2321 } else {
2322 SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
2323 DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2324 StackPtr, PtrOff);
2325 DstInfo =
2327 }
2328
2329 return std::make_pair(DstAddr, DstInfo);
2330}
2331
2332// Returns the type of copying which is required to set up a byval argument to
2333// a tail-called function. This isn't needed for non-tail calls, because they
2334// always need the equivalent of CopyOnce, but tail-calls sometimes need two to
2335// avoid clobbering another argument (CopyViaTemp), and sometimes can be
2336// optimised to zero copies when forwarding an argument from the caller's
2337// caller (NoCopy).
2338ARMTargetLowering::ByValCopyKind ARMTargetLowering::ByValNeedsCopyForTailCall(
2339 SelectionDAG &DAG, SDValue Src, SDValue Dst, ISD::ArgFlagsTy Flags) const {
2342
2343 // Globals are always safe to copy from.
2344 if (isa<GlobalAddressSDNode>(Src) || isa<ExternalSymbolSDNode>(Src))
2345 return CopyOnce;
2346
2347 // Can only analyse frame index nodes, conservatively assume we need a
2348 // temporary.
2349 auto *SrcFrameIdxNode = dyn_cast<FrameIndexSDNode>(Src);
2350 auto *DstFrameIdxNode = dyn_cast<FrameIndexSDNode>(Dst);
2351 if (!SrcFrameIdxNode || !DstFrameIdxNode)
2352 return CopyViaTemp;
2353
2354 int SrcFI = SrcFrameIdxNode->getIndex();
2355 int DstFI = DstFrameIdxNode->getIndex();
2356 assert(MFI.isFixedObjectIndex(DstFI) &&
2357 "byval passed in non-fixed stack slot");
2358
2359 int64_t SrcOffset = MFI.getObjectOffset(SrcFI);
2360 int64_t DstOffset = MFI.getObjectOffset(DstFI);
2361
2362 // If the source is in the local frame, then the copy to the argument memory
2363 // is always valid.
2364 bool FixedSrc = MFI.isFixedObjectIndex(SrcFI);
2365 if (!FixedSrc ||
2366 (FixedSrc && SrcOffset < -(int64_t)AFI->getArgRegsSaveSize()))
2367 return CopyOnce;
2368
2369 // In the case of byval arguments split between registers and the stack,
2370 // computeAddrForCallArg returns a FrameIndex which corresponds only to the
2371 // stack portion, but the Src SDValue will refer to the full value, including
2372 // the local stack memory that the register portion gets stored into. We only
2373 // need to compare them for equality, so normalise on the full value version.
2374 uint64_t RegSize = Flags.getByValSize() - MFI.getObjectSize(DstFI);
2375 DstOffset -= RegSize;
2376
2377 // If the value is already in the correct location, then no copying is
2378 // needed. If not, then we need to copy via a temporary.
2379 if (SrcOffset == DstOffset)
2380 return NoCopy;
2381 else
2382 return CopyViaTemp;
2383}
2384
2385void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2386 SDValue Chain, SDValue &Arg,
2387 RegsToPassVector &RegsToPass,
2388 CCValAssign &VA, CCValAssign &NextVA,
2389 SDValue &StackPtr,
2390 SmallVectorImpl<SDValue> &MemOpChains,
2391 bool IsTailCall,
2392 int SPDiff) const {
2393 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2394 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2395 unsigned id = Subtarget->isLittle() ? 0 : 1;
2396 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2397
2398 if (NextVA.isRegLoc())
2399 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2400 else {
2401 assert(NextVA.isMemLoc());
2402 if (!StackPtr.getNode())
2403 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2405
2406 SDValue DstAddr;
2407 MachinePointerInfo DstInfo;
2408 std::tie(DstAddr, DstInfo) =
2409 computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2410 MemOpChains.push_back(
2411 DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2412 }
2413}
2414
2415static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
2416 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
2418}
2419
2420/// LowerCall - Lowering a call into a callseq_start <-
2421/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2422/// nodes.
2423SDValue
2424ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2425 SmallVectorImpl<SDValue> &InVals) const {
2426 SelectionDAG &DAG = CLI.DAG;
2427 SDLoc &dl = CLI.DL;
2429 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2431 SDValue Chain = CLI.Chain;
2432 SDValue Callee = CLI.Callee;
2433 bool &isTailCall = CLI.IsTailCall;
2434 CallingConv::ID CallConv = CLI.CallConv;
2435 bool doesNotRet = CLI.DoesNotReturn;
2436 bool isVarArg = CLI.IsVarArg;
2437
2442 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2443 bool isThisReturn = false;
2444 bool isCmseNSCall = false;
2445 bool isSibCall = false;
2446 bool PreferIndirect = false;
2447 bool GuardWithBTI = false;
2448
2449 // Analyze operands of the call, assigning locations to each operand.
2451 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2452 *DAG.getContext());
2453 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2454
2455 // Lower 'returns_twice' calls to a pseudo-instruction.
2456 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2457 !Subtarget->noBTIAtReturnTwice())
2458 GuardWithBTI = AFI->branchTargetEnforcement();
2459
2460 // Determine whether this is a non-secure function call.
2461 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2462 isCmseNSCall = true;
2463
2464 // Disable tail calls if they're not supported.
2465 if (!Subtarget->supportsTailCall())
2466 isTailCall = false;
2467
2468 // For both the non-secure calls and the returns from a CMSE entry function,
2469 // the function needs to do some extra work after the call, or before the
2470 // return, respectively, thus it cannot end with a tail call
2471 if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2472 isTailCall = false;
2473
2474 if (isa<GlobalAddressSDNode>(Callee)) {
2475 // If we're optimizing for minimum size and the function is called three or
2476 // more times in this block, we can improve codesize by calling indirectly
2477 // as BLXr has a 16-bit encoding.
2478 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2479 if (CLI.CB) {
2480 auto *BB = CLI.CB->getParent();
2481 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2482 count_if(GV->users(), [&BB](const User *U) {
2483 return isa<Instruction>(U) &&
2484 cast<Instruction>(U)->getParent() == BB;
2485 }) > 2;
2486 }
2487 }
2488 if (isTailCall) {
2489 // Check if it's really possible to do a tail call.
2490 isTailCall =
2491 IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, PreferIndirect);
2492
2493 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2494 CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2495 isSibCall = true;
2496
2497 // We don't support GuaranteedTailCallOpt for ARM, only automatically
2498 // detected sibcalls.
2499 if (isTailCall)
2500 ++NumTailCalls;
2501 }
2502
2503 if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2504 report_fatal_error("failed to perform tail call elimination on a call "
2505 "site marked musttail");
2506
2507 // Get a count of how many bytes are to be pushed on the stack.
2508 unsigned NumBytes = CCInfo.getStackSize();
2509
2510 // SPDiff is the byte offset of the call's argument area from the callee's.
2511 // Stores to callee stack arguments will be placed in FixedStackSlots offset
2512 // by this amount for a tail call. In a sibling call it must be 0 because the
2513 // caller will deallocate the entire stack and the callee still expects its
2514 // arguments to begin at SP+0. Completely unused for non-tail calls.
2515 int SPDiff = 0;
2516
2517 if (isTailCall && !isSibCall) {
2518 auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2519 unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2520
2521 // Since callee will pop argument stack as a tail call, we must keep the
2522 // popped size 16-byte aligned.
2523 MaybeAlign StackAlign = DAG.getDataLayout().getStackAlignment();
2524 assert(StackAlign && "data layout string is missing stack alignment");
2525 NumBytes = alignTo(NumBytes, *StackAlign);
2526
2527 // SPDiff will be negative if this tail call requires more space than we
2528 // would automatically have in our incoming argument space. Positive if we
2529 // can actually shrink the stack.
2530 SPDiff = NumReusableBytes - NumBytes;
2531
2532 // If this call requires more stack than we have available from
2533 // LowerFormalArguments, tell FrameLowering to reserve space for it.
2534 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2535 AFI->setArgRegsSaveSize(-SPDiff);
2536 }
2537
2538 if (isSibCall) {
2539 // For sibling tail calls, memory operands are available in our caller's stack.
2540 NumBytes = 0;
2541 } else {
2542 // Adjust the stack pointer for the new arguments...
2543 // These operations are automatically eliminated by the prolog/epilog pass
2544 Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2545 }
2546
2548 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2549
2550 RegsToPassVector RegsToPass;
2551 SmallVector<SDValue, 8> MemOpChains;
2552
2553 // If we are doing a tail-call, any byval arguments will be written to stack
2554 // space which was used for incoming arguments. If any the values being used
2555 // are incoming byval arguments to this function, then they might be
2556 // overwritten by the stores of the outgoing arguments. To avoid this, we
2557 // need to make a temporary copy of them in local stack space, then copy back
2558 // to the argument area.
2559 DenseMap<unsigned, SDValue> ByValTemporaries;
2560 SDValue ByValTempChain;
2561 if (isTailCall) {
2562 SmallVector<SDValue, 8> ByValCopyChains;
2563 for (const CCValAssign &VA : ArgLocs) {
2564 unsigned ArgIdx = VA.getValNo();
2565 SDValue Src = OutVals[ArgIdx];
2566 ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags;
2567
2568 if (!Flags.isByVal())
2569 continue;
2570
2571 SDValue Dst;
2572 MachinePointerInfo DstInfo;
2573 std::tie(Dst, DstInfo) =
2574 computeAddrForCallArg(dl, DAG, VA, SDValue(), true, SPDiff);
2575 ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags);
2576
2577 if (Copy == NoCopy) {
2578 // If the argument is already at the correct offset on the stack
2579 // (because we are forwarding a byval argument from our caller), we
2580 // don't need any copying.
2581 continue;
2582 } else if (Copy == CopyOnce) {
2583 // If the argument is in our local stack frame, no other argument
2584 // preparation can clobber it, so we can copy it to the final location
2585 // later.
2586 ByValTemporaries[ArgIdx] = Src;
2587 } else {
2588 assert(Copy == CopyViaTemp && "unexpected enum value");
2589 // If we might be copying this argument from the outgoing argument
2590 // stack area, we need to copy via a temporary in the local stack
2591 // frame.
2592 int TempFrameIdx = MFI.CreateStackObject(
2593 Flags.getByValSize(), Flags.getNonZeroByValAlign(), false);
2594 SDValue Temp =
2595 DAG.getFrameIndex(TempFrameIdx, getPointerTy(DAG.getDataLayout()));
2596
2597 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2598 SDValue AlignNode =
2599 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2600
2601 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2602 SDValue Ops[] = {Chain, Temp, Src, SizeNode, AlignNode};
2603 ByValCopyChains.push_back(
2604 DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, Ops));
2605 ByValTemporaries[ArgIdx] = Temp;
2606 }
2607 }
2608 if (!ByValCopyChains.empty())
2609 ByValTempChain =
2610 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains);
2611 }
2612
2613 // During a tail call, stores to the argument area must happen after all of
2614 // the function's incoming arguments have been loaded because they may alias.
2615 // This is done by folding in a TokenFactor from LowerFormalArguments, but
2616 // there's no point in doing so repeatedly so this tracks whether that's
2617 // happened yet.
2618 bool AfterFormalArgLoads = false;
2619
2620 // Walk the register/memloc assignments, inserting copies/loads. In the case
2621 // of tail call optimization, arguments are handled later.
2622 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2623 i != e;
2624 ++i, ++realArgIdx) {
2625 CCValAssign &VA = ArgLocs[i];
2626 SDValue Arg = OutVals[realArgIdx];
2627 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2628 bool isByVal = Flags.isByVal();
2629
2630 // Promote the value if needed.
2631 switch (VA.getLocInfo()) {
2632 default: llvm_unreachable("Unknown loc info!");
2633 case CCValAssign::Full: break;
2634 case CCValAssign::SExt:
2635 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2636 break;
2637 case CCValAssign::ZExt:
2638 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2639 break;
2640 case CCValAssign::AExt:
2641 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2642 break;
2643 case CCValAssign::BCvt:
2644 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2645 break;
2646 }
2647
2648 if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2649 Chain = DAG.getStackArgumentTokenFactor(Chain);
2650 if (ByValTempChain)
2651 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain,
2652 ByValTempChain);
2653 AfterFormalArgLoads = true;
2654 }
2655
2656 // f16 arguments have their size extended to 4 bytes and passed as if they
2657 // had been copied to the LSBs of a 32-bit register.
2658 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2659 if (VA.needsCustom() &&
2660 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2661 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2662 } else {
2663 // f16 arguments could have been extended prior to argument lowering.
2664 // Mask them arguments if this is a CMSE nonsecure call.
2665 auto ArgVT = Outs[realArgIdx].ArgVT;
2666 if (isCmseNSCall && (ArgVT == MVT::f16)) {
2667 auto LocBits = VA.getLocVT().getSizeInBits();
2668 auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2669 SDValue Mask =
2670 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2671 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2672 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2673 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2674 }
2675 }
2676
2677 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2678 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2679 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2680 DAG.getConstant(0, dl, MVT::i32));
2681 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2682 DAG.getConstant(1, dl, MVT::i32));
2683
2684 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2685 StackPtr, MemOpChains, isTailCall, SPDiff);
2686
2687 VA = ArgLocs[++i]; // skip ahead to next loc
2688 if (VA.isRegLoc()) {
2689 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2690 StackPtr, MemOpChains, isTailCall, SPDiff);
2691 } else {
2692 assert(VA.isMemLoc());
2693 SDValue DstAddr;
2694 MachinePointerInfo DstInfo;
2695 std::tie(DstAddr, DstInfo) =
2696 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2697 MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2698 }
2699 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2700 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2701 StackPtr, MemOpChains, isTailCall, SPDiff);
2702 } else if (VA.isRegLoc()) {
2703 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2704 Outs[0].VT == MVT::i32) {
2705 assert(VA.getLocVT() == MVT::i32 &&
2706 "unexpected calling convention register assignment");
2707 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2708 "unexpected use of 'returned'");
2709 isThisReturn = true;
2710 }
2711 const TargetOptions &Options = DAG.getTarget().Options;
2712 if (Options.EmitCallSiteInfo)
2713 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
2714 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2715 } else if (isByVal) {
2716 assert(VA.isMemLoc());
2717 unsigned offset = 0;
2718
2719 // True if this byval aggregate will be split between registers
2720 // and memory.
2721 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2722 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2723
2724 SDValue ByValSrc;
2725 bool NeedsStackCopy;
2726 if (ByValTemporaries.contains(realArgIdx)) {
2727 ByValSrc = ByValTemporaries[realArgIdx];
2728 NeedsStackCopy = true;
2729 } else {
2730 ByValSrc = Arg;
2731 NeedsStackCopy = !isTailCall;
2732 }
2733
2734 // If part of the argument is in registers, load them.
2735 if (CurByValIdx < ByValArgsCount) {
2736 unsigned RegBegin, RegEnd;
2737 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2738
2739 EVT PtrVT =
2741 unsigned int i, j;
2742 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2743 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2744 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, ByValSrc, Const);
2745 SDValue Load =
2746 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2747 DAG.InferPtrAlign(AddArg));
2748 MemOpChains.push_back(Load.getValue(1));
2749 RegsToPass.push_back(std::make_pair(j, Load));
2750 }
2751
2752 // If parameter size outsides register area, "offset" value
2753 // helps us to calculate stack slot for remained part properly.
2754 offset = RegEnd - RegBegin;
2755
2756 CCInfo.nextInRegsParam();
2757 }
2758
2759 // If the memory part of the argument isn't already in the correct place
2760 // (which can happen with tail calls), copy it into the argument area.
2761 if (NeedsStackCopy && Flags.getByValSize() > 4 * offset) {
2762 auto PtrVT = getPointerTy(DAG.getDataLayout());
2763 SDValue Dst;
2764 MachinePointerInfo DstInfo;
2765 std::tie(Dst, DstInfo) =
2766 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2767 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2768 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, ByValSrc, SrcOffset);
2769 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2770 MVT::i32);
2771 SDValue AlignNode =
2772 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2773
2774 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2775 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2776 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2777 Ops));
2778 }
2779 } else {
2780 assert(VA.isMemLoc());
2781 SDValue DstAddr;
2782 MachinePointerInfo DstInfo;
2783 std::tie(DstAddr, DstInfo) =
2784 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2785
2786 SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2787 MemOpChains.push_back(Store);
2788 }
2789 }
2790
2791 if (!MemOpChains.empty())
2792 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2793
2794 // Build a sequence of copy-to-reg nodes chained together with token chain
2795 // and flag operands which copy the outgoing args into the appropriate regs.
2796 SDValue InGlue;
2797 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2798 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2799 RegsToPass[i].second, InGlue);
2800 InGlue = Chain.getValue(1);
2801 }
2802
2803 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2804 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2805 // node so that legalize doesn't hack it.
2806 bool isDirect = false;
2807
2809 const GlobalValue *GVal = nullptr;
2810 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2811 GVal = G->getGlobal();
2812 bool isStub = !TM.shouldAssumeDSOLocal(GVal) && Subtarget->isTargetMachO();
2813
2814 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2815 bool isLocalARMFunc = false;
2816 auto PtrVt = getPointerTy(DAG.getDataLayout());
2817
2818 if (Subtarget->genLongCalls()) {
2819 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2820 "long-calls codegen is not position independent!");
2821 // Handle a global address or an external symbol. If it's not one of
2822 // those, the target's already in a register, so we don't need to do
2823 // anything extra.
2824 if (isa<GlobalAddressSDNode>(Callee)) {
2825 if (Subtarget->genExecuteOnly()) {
2826 if (Subtarget->useMovt())
2827 ++NumMovwMovt;
2828 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2829 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2830 } else {
2831 // Create a constant pool entry for the callee address
2832 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2834 GVal, ARMPCLabelIndex, ARMCP::CPValue, 0);
2835
2836 // Get the address of the callee into a register
2837 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2838 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2839 Callee = DAG.getLoad(
2840 PtrVt, dl, DAG.getEntryNode(), Addr,
2842 }
2843 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2844 const char *Sym = S->getSymbol();
2845
2846 if (Subtarget->genExecuteOnly()) {
2847 if (Subtarget->useMovt())
2848 ++NumMovwMovt;
2849 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2850 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2851 } else {
2852 // Create a constant pool entry for the callee address
2853 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2855 *DAG.getContext(), Sym, ARMPCLabelIndex, 0);
2856
2857 // Get the address of the callee into a register
2858 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2859 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2860 Callee = DAG.getLoad(
2861 PtrVt, dl, DAG.getEntryNode(), Addr,
2863 }
2864 }
2865 } else if (isa<GlobalAddressSDNode>(Callee)) {
2866 if (!PreferIndirect) {
2867 isDirect = true;
2868 bool isDef = GVal->isStrongDefinitionForLinker();
2869
2870 // ARM call to a local ARM function is predicable.
2871 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2872 // tBX takes a register source operand.
2873 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2874 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2875 Callee = DAG.getNode(
2876 ARMISD::WrapperPIC, dl, PtrVt,
2877 DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2878 Callee = DAG.getLoad(
2879 PtrVt, dl, DAG.getEntryNode(), Callee,
2883 } else if (Subtarget->isTargetCOFF()) {
2884 assert(Subtarget->isTargetWindows() &&
2885 "Windows is the only supported COFF target");
2886 unsigned TargetFlags = ARMII::MO_NO_FLAG;
2887 if (GVal->hasDLLImportStorageClass())
2888 TargetFlags = ARMII::MO_DLLIMPORT;
2889 else if (!TM.shouldAssumeDSOLocal(GVal))
2890 TargetFlags = ARMII::MO_COFFSTUB;
2891 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, /*offset=*/0,
2892 TargetFlags);
2893 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2894 Callee =
2895 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2896 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2898 } else {
2899 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, 0);
2900 }
2901 }
2902 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2903 isDirect = true;
2904 // tBX takes a register source operand.
2905 const char *Sym = S->getSymbol();
2906 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2907 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2910 ARMPCLabelIndex, 4);
2911 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2912 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2913 Callee = DAG.getLoad(
2914 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2916 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2917 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2918 } else {
2919 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2920 }
2921 }
2922
2923 if (isCmseNSCall) {
2924 assert(!isARMFunc && !isDirect &&
2925 "Cannot handle call to ARM function or direct call");
2926 if (NumBytes > 0) {
2928 "call to non-secure function would "
2929 "require passing arguments on stack",
2930 dl.getDebugLoc());
2931 DAG.getContext()->diagnose(Diag);
2932 }
2933 if (isStructRet) {
2936 "call to non-secure function would return value through pointer",
2937 dl.getDebugLoc());
2938 DAG.getContext()->diagnose(Diag);
2939 }
2940 }
2941
2942 // FIXME: handle tail calls differently.
2943 unsigned CallOpc;
2944 if (Subtarget->isThumb()) {
2945 if (GuardWithBTI)
2946 CallOpc = ARMISD::t2CALL_BTI;
2947 else if (isCmseNSCall)
2948 CallOpc = ARMISD::tSECALL;
2949 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2950 CallOpc = ARMISD::CALL_NOLINK;
2951 else
2952 CallOpc = ARMISD::CALL;
2953 } else {
2954 if (!isDirect && !Subtarget->hasV5TOps())
2955 CallOpc = ARMISD::CALL_NOLINK;
2956 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2957 // Emit regular call when code size is the priority
2958 !Subtarget->hasMinSize())
2959 // "mov lr, pc; b _foo" to avoid confusing the RSP
2960 CallOpc = ARMISD::CALL_NOLINK;
2961 else
2962 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2963 }
2964
2965 // We don't usually want to end the call-sequence here because we would tidy
2966 // the frame up *after* the call, however in the ABI-changing tail-call case
2967 // we've carefully laid out the parameters so that when sp is reset they'll be
2968 // in the correct location.
2969 if (isTailCall && !isSibCall) {
2970 Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, dl);
2971 InGlue = Chain.getValue(1);
2972 }
2973
2974 std::vector<SDValue> Ops;
2975 Ops.push_back(Chain);
2976 Ops.push_back(Callee);
2977
2978 if (isTailCall) {
2979 Ops.push_back(DAG.getSignedTargetConstant(SPDiff, dl, MVT::i32));
2980 }
2981
2982 // Add argument registers to the end of the list so that they are known live
2983 // into the call.
2984 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2985 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2986 RegsToPass[i].second.getValueType()));
2987
2988 // Add a register mask operand representing the call-preserved registers.
2989 const uint32_t *Mask;
2990 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2991 if (isThisReturn) {
2992 // For 'this' returns, use the R0-preserving mask if applicable
2993 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2994 if (!Mask) {
2995 // Set isThisReturn to false if the calling convention is not one that
2996 // allows 'returned' to be modeled in this way, so LowerCallResult does
2997 // not try to pass 'this' straight through
2998 isThisReturn = false;
2999 Mask = ARI->getCallPreservedMask(MF, CallConv);
3000 }
3001 } else
3002 Mask = ARI->getCallPreservedMask(MF, CallConv);
3003
3004 assert(Mask && "Missing call preserved mask for calling convention");
3005 Ops.push_back(DAG.getRegisterMask(Mask));
3006
3007 if (InGlue.getNode())
3008 Ops.push_back(InGlue);
3009
3010 if (isTailCall) {
3012 SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, MVT::Other, Ops);
3013 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
3014 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
3015 return Ret;
3016 }
3017
3018 // Returns a chain and a flag for retval copy to use.
3019 Chain = DAG.getNode(CallOpc, dl, {MVT::Other, MVT::Glue}, Ops);
3020 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
3021 InGlue = Chain.getValue(1);
3022 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
3023
3024 // If we're guaranteeing tail-calls will be honoured, the callee must
3025 // pop its own argument stack on return. But this call is *not* a tail call so
3026 // we need to undo that after it returns to restore the status-quo.
3027 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
3028 uint64_t CalleePopBytes =
3029 canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1U;
3030
3031 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, dl);
3032 if (!Ins.empty())
3033 InGlue = Chain.getValue(1);
3034
3035 // Handle result values, copying them out of physregs into vregs that we
3036 // return.
3037 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
3038 InVals, isThisReturn,
3039 isThisReturn ? OutVals[0] : SDValue(), isCmseNSCall);
3040}
3041
3042/// HandleByVal - Every parameter *after* a byval parameter is passed
3043/// on the stack. Remember the next parameter register to allocate,
3044/// and then confiscate the rest of the parameter registers to insure
3045/// this.
3046void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
3047 Align Alignment) const {
3048 // Byval (as with any stack) slots are always at least 4 byte aligned.
3049 Alignment = std::max(Alignment, Align(4));
3050
3052 if (!Reg)
3053 return;
3054
3055 unsigned AlignInRegs = Alignment.value() / 4;
3056 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
3057 for (unsigned i = 0; i < Waste; ++i)
3058 Reg = State->AllocateReg(GPRArgRegs);
3059
3060 if (!Reg)
3061 return;
3062
3063 unsigned Excess = 4 * (ARM::R4 - Reg);
3064
3065 // Special case when NSAA != SP and parameter size greater than size of
3066 // all remained GPR regs. In that case we can't split parameter, we must
3067 // send it to stack. We also must set NCRN to R4, so waste all
3068 // remained registers.
3069 const unsigned NSAAOffset = State->getStackSize();
3070 if (NSAAOffset != 0 && Size > Excess) {
3071 while (State->AllocateReg(GPRArgRegs))
3072 ;
3073 return;
3074 }
3075
3076 // First register for byval parameter is the first register that wasn't
3077 // allocated before this method call, so it would be "reg".
3078 // If parameter is small enough to be saved in range [reg, r4), then
3079 // the end (first after last) register would be reg + param-size-in-regs,
3080 // else parameter would be splitted between registers and stack,
3081 // end register would be r4 in this case.
3082 unsigned ByValRegBegin = Reg;
3083 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
3084 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
3085 // Note, first register is allocated in the beginning of function already,
3086 // allocate remained amount of registers we need.
3087 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
3088 State->AllocateReg(GPRArgRegs);
3089 // A byval parameter that is split between registers and memory needs its
3090 // size truncated here.
3091 // In the case where the entire structure fits in registers, we set the
3092 // size in memory to zero.
3093 Size = std::max<int>(Size - Excess, 0);
3094}
3095
3096/// IsEligibleForTailCallOptimization - Check whether the call is eligible
3097/// for tail call optimization. Targets which want to do tail call
3098/// optimization should implement this function. Note that this function also
3099/// processes musttail calls, so when this function returns false on a valid
3100/// musttail call, a fatal backend error occurs.
3101bool ARMTargetLowering::IsEligibleForTailCallOptimization(
3103 SmallVectorImpl<CCValAssign> &ArgLocs, const bool isIndirect) const {
3104 CallingConv::ID CalleeCC = CLI.CallConv;
3105 SDValue Callee = CLI.Callee;
3106 bool isVarArg = CLI.IsVarArg;
3107 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3108 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3110 const SelectionDAG &DAG = CLI.DAG;
3112 const Function &CallerF = MF.getFunction();
3113 CallingConv::ID CallerCC = CallerF.getCallingConv();
3114
3115 assert(Subtarget->supportsTailCall());
3116
3117 // Indirect tail-calls require a register to hold the target address. That
3118 // register must be:
3119 // * Allocatable (i.e. r0-r7 if the target is Thumb1).
3120 // * Not callee-saved, so must be one of r0-r3 or r12.
3121 // * Not used to hold an argument to the tail-called function, which might be
3122 // in r0-r3.
3123 // * Not used to hold the return address authentication code, which is in r12
3124 // if enabled.
3125 // Sometimes, no register matches all of these conditions, so we can't do a
3126 // tail-call.
3127 if (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect) {
3128 SmallSet<MCPhysReg, 5> AddressRegisters;
3129 for (Register R : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
3130 AddressRegisters.insert(R);
3131 if (!(Subtarget->isThumb1Only() ||
3133 AddressRegisters.insert(ARM::R12);
3134 for (const CCValAssign &AL : ArgLocs)
3135 if (AL.isRegLoc())
3136 AddressRegisters.erase(AL.getLocReg());
3137 if (AddressRegisters.empty()) {
3138 LLVM_DEBUG(dbgs() << "false (no reg to hold function pointer)\n");
3139 return false;
3140 }
3141 }
3142
3143 // Look for obvious safe cases to perform tail call optimization that do not
3144 // require ABI changes. This is what gcc calls sibcall.
3145
3146 // Exception-handling functions need a special set of instructions to indicate
3147 // a return to the hardware. Tail-calling another function would probably
3148 // break this.
3149 if (CallerF.hasFnAttribute("interrupt")) {
3150 LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n");
3151 return false;
3152 }
3153
3154 if (canGuaranteeTCO(CalleeCC,
3155 getTargetMachine().Options.GuaranteedTailCallOpt)) {
3156 LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false")
3157 << " (guaranteed tail-call CC)\n");
3158 return CalleeCC == CallerCC;
3159 }
3160
3161 // Also avoid sibcall optimization if either caller or callee uses struct
3162 // return semantics.
3163 bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
3164 bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
3165 if (isCalleeStructRet != isCallerStructRet) {
3166 LLVM_DEBUG(dbgs() << "false (struct-ret)\n");
3167 return false;
3168 }
3169
3170 // Externally-defined functions with weak linkage should not be
3171 // tail-called on ARM when the OS does not support dynamic
3172 // pre-emption of symbols, as the AAELF spec requires normal calls
3173 // to undefined weak functions to be replaced with a NOP or jump to the
3174 // next instruction. The behaviour of branch instructions in this
3175 // situation (as used for tail calls) is implementation-defined, so we
3176 // cannot rely on the linker replacing the tail call with a return.
3177 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3178 const GlobalValue *GV = G->getGlobal();
3180 if (GV->hasExternalWeakLinkage() &&
3181 (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
3182 TT.isOSBinFormatMachO())) {
3183 LLVM_DEBUG(dbgs() << "false (external weak linkage)\n");
3184 return false;
3185 }
3186 }
3187
3188 // Check that the call results are passed in the same way.
3189 LLVMContext &C = *DAG.getContext();
3191 getEffectiveCallingConv(CalleeCC, isVarArg),
3192 getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
3193 CCAssignFnForReturn(CalleeCC, isVarArg),
3194 CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) {
3195 LLVM_DEBUG(dbgs() << "false (incompatible results)\n");
3196 return false;
3197 }
3198 // The callee has to preserve all registers the caller needs to preserve.
3199 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3200 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3201 if (CalleeCC != CallerCC) {
3202 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3203 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) {
3204 LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n");
3205 return false;
3206 }
3207 }
3208
3209 // If Caller's vararg argument has been split between registers and stack, do
3210 // not perform tail call, since part of the argument is in caller's local
3211 // frame.
3212 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3213 if (CLI.IsVarArg && AFI_Caller->getArgRegsSaveSize()) {
3214 LLVM_DEBUG(dbgs() << "false (arg reg save area)\n");
3215 return false;
3216 }
3217
3218 // If the callee takes no arguments then go on to check the results of the
3219 // call.
3220 const MachineRegisterInfo &MRI = MF.getRegInfo();
3221 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) {
3222 LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n");
3223 return false;
3224 }
3225
3226 // If the stack arguments for this call do not fit into our own save area then
3227 // the call cannot be made tail.
3228 if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize())
3229 return false;
3230
3231 LLVM_DEBUG(dbgs() << "true\n");
3232 return true;
3233}
3234
3235bool
3236ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3237 MachineFunction &MF, bool isVarArg,
3239 LLVMContext &Context) const {
3241 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3242 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3243}
3244
3246 const SDLoc &DL, SelectionDAG &DAG) {
3247 const MachineFunction &MF = DAG.getMachineFunction();
3248 const Function &F = MF.getFunction();
3249
3250 StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
3251
3252 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
3253 // version of the "preferred return address". These offsets affect the return
3254 // instruction if this is a return from PL1 without hypervisor extensions.
3255 // IRQ/FIQ: +4 "subs pc, lr, #4"
3256 // SWI: 0 "subs pc, lr, #0"
3257 // ABORT: +4 "subs pc, lr, #4"
3258 // UNDEF: +4/+2 "subs pc, lr, #0"
3259 // UNDEF varies depending on where the exception came from ARM or Thumb
3260 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
3261
3262 int64_t LROffset;
3263 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3264 IntKind == "ABORT")
3265 LROffset = 4;
3266 else if (IntKind == "SWI" || IntKind == "UNDEF")
3267 LROffset = 0;
3268 else
3269 report_fatal_error("Unsupported interrupt attribute. If present, value "
3270 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3271
3272 RetOps.insert(RetOps.begin() + 1,
3273 DAG.getConstant(LROffset, DL, MVT::i32, false));
3274
3275 return DAG.getNode(ARMISD::INTRET_GLUE, DL, MVT::Other, RetOps);
3276}
3277
3278SDValue
3279ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3280 bool isVarArg,
3282 const SmallVectorImpl<SDValue> &OutVals,
3283 const SDLoc &dl, SelectionDAG &DAG) const {
3284 // CCValAssign - represent the assignment of the return value to a location.
3286
3287 // CCState - Info about the registers and stack slots.
3288 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3289 *DAG.getContext());
3290
3291 // Analyze outgoing return values.
3292 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3293
3294 SDValue Glue;
3296 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3297 bool isLittleEndian = Subtarget->isLittle();
3298
3301 AFI->setReturnRegsCount(RVLocs.size());
3302
3303 // Report error if cmse entry function returns structure through first ptr arg.
3304 if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3305 // Note: using an empty SDLoc(), as the first line of the function is a
3306 // better place to report than the last line.
3309 "secure entry function would return value through pointer",
3310 SDLoc().getDebugLoc());
3311 DAG.getContext()->diagnose(Diag);
3312 }
3313
3314 // Copy the result values into the output registers.
3315 for (unsigned i = 0, realRVLocIdx = 0;
3316 i != RVLocs.size();
3317 ++i, ++realRVLocIdx) {
3318 CCValAssign &VA = RVLocs[i];
3319 assert(VA.isRegLoc() && "Can only return in registers!");
3320
3321 SDValue Arg = OutVals[realRVLocIdx];
3322 bool ReturnF16 = false;
3323
3324 if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3325 // Half-precision return values can be returned like this:
3326 //
3327 // t11 f16 = fadd ...
3328 // t12: i16 = bitcast t11
3329 // t13: i32 = zero_extend t12
3330 // t14: f32 = bitcast t13 <~~~~~~~ Arg
3331 //
3332 // to avoid code generation for bitcasts, we simply set Arg to the node
3333 // that produces the f16 value, t11 in this case.
3334 //
3335 if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3336 SDValue ZE = Arg.getOperand(0);
3337 if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3338 SDValue BC = ZE.getOperand(0);
3339 if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3340 Arg = BC.getOperand(0);
3341 ReturnF16 = true;
3342 }
3343 }
3344 }
3345 }
3346
3347 switch (VA.getLocInfo()) {
3348 default: llvm_unreachable("Unknown loc info!");
3349 case CCValAssign::Full: break;
3350 case CCValAssign::BCvt:
3351 if (!ReturnF16)
3352 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3353 break;
3354 }
3355
3356 // Mask f16 arguments if this is a CMSE nonsecure entry.
3357 auto RetVT = Outs[realRVLocIdx].ArgVT;
3358 if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3359 if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3360 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3361 } else {
3362 auto LocBits = VA.getLocVT().getSizeInBits();
3363 auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3364 SDValue Mask =
3365 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3366 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3367 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3368 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3369 }
3370 }
3371
3372 if (VA.needsCustom() &&
3373 (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3374 if (VA.getLocVT() == MVT::v2f64) {
3375 // Extract the first half and return it in two registers.
3376 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3377 DAG.getConstant(0, dl, MVT::i32));
3378 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3379 DAG.getVTList(MVT::i32, MVT::i32), Half);
3380
3381 Chain =
3382 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3383 HalfGPRs.getValue(isLittleEndian ? 0 : 1), Glue);
3384 Glue = Chain.getValue(1);
3385 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3386 VA = RVLocs[++i]; // skip ahead to next loc
3387 Chain =
3388 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3389 HalfGPRs.getValue(isLittleEndian ? 1 : 0), Glue);
3390 Glue = Chain.getValue(1);
3391 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3392 VA = RVLocs[++i]; // skip ahead to next loc
3393
3394 // Extract the 2nd half and fall through to handle it as an f64 value.
3395 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3396 DAG.getConstant(1, dl, MVT::i32));
3397 }
3398 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3399 // available.
3400 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3401 DAG.getVTList(MVT::i32, MVT::i32), Arg);
3402 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3403 fmrrd.getValue(isLittleEndian ? 0 : 1), Glue);
3404 Glue = Chain.getValue(1);
3405 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3406 VA = RVLocs[++i]; // skip ahead to next loc
3407 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3408 fmrrd.getValue(isLittleEndian ? 1 : 0), Glue);
3409 } else
3410 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
3411
3412 // Guarantee that all emitted copies are
3413 // stuck together, avoiding something bad.
3414 Glue = Chain.getValue(1);
3415 RetOps.push_back(DAG.getRegister(
3416 VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3417 }
3418 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3419 const MCPhysReg *I =
3420 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3421 if (I) {
3422 for (; *I; ++I) {
3423 if (ARM::GPRRegClass.contains(*I))
3424 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3425 else if (ARM::DPRRegClass.contains(*I))
3427 else
3428 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3429 }
3430 }
3431
3432 // Update chain and glue.
3433 RetOps[0] = Chain;
3434 if (Glue.getNode())
3435 RetOps.push_back(Glue);
3436
3437 // CPUs which aren't M-class use a special sequence to return from
3438 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3439 // though we use "subs pc, lr, #N").
3440 //
3441 // M-class CPUs actually use a normal return sequence with a special
3442 // (hardware-provided) value in LR, so the normal code path works.
3443 if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3444 !Subtarget->isMClass()) {
3445 if (Subtarget->isThumb1Only())
3446 report_fatal_error("interrupt attribute is not supported in Thumb1");
3447 return LowerInterruptReturn(RetOps, dl, DAG);
3448 }
3449
3452 return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3453}
3454
3455bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3456 if (N->getNumValues() != 1)
3457 return false;
3458 if (!N->hasNUsesOfValue(1, 0))
3459 return false;
3460
3461 SDValue TCChain = Chain;
3462 SDNode *Copy = *N->user_begin();
3463 if (Copy->getOpcode() == ISD::CopyToReg) {
3464 // If the copy has a glue operand, we conservatively assume it isn't safe to
3465 // perform a tail call.
3466 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3467 return false;
3468 TCChain = Copy->getOperand(0);
3469 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3470 SDNode *VMov = Copy;
3471 // f64 returned in a pair of GPRs.
3473 for (SDNode *U : VMov->users()) {
3474 if (U->getOpcode() != ISD::CopyToReg)
3475 return false;
3476 Copies.insert(U);
3477 }
3478 if (Copies.size() > 2)
3479 return false;
3480
3481 for (SDNode *U : VMov->users()) {
3482 SDValue UseChain = U->getOperand(0);
3483 if (Copies.count(UseChain.getNode()))
3484 // Second CopyToReg
3485 Copy = U;
3486 else {
3487 // We are at the top of this chain.
3488 // If the copy has a glue operand, we conservatively assume it
3489 // isn't safe to perform a tail call.
3490 if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3491 return false;
3492 // First CopyToReg
3493 TCChain = UseChain;
3494 }
3495 }
3496 } else if (Copy->getOpcode() == ISD::BITCAST) {
3497 // f32 returned in a single GPR.
3498 if (!Copy->hasOneUse())
3499 return false;
3500 Copy = *Copy->user_begin();
3501 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3502 return false;
3503 // If the copy has a glue operand, we conservatively assume it isn't safe to
3504 // perform a tail call.
3505 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3506 return false;
3507 TCChain = Copy->getOperand(0);
3508 } else {
3509 return false;
3510 }
3511
3512 bool HasRet = false;
3513 for (const SDNode *U : Copy->users()) {
3514 if (U->getOpcode() != ARMISD::RET_GLUE &&
3515 U->getOpcode() != ARMISD::INTRET_GLUE)
3516 return false;
3517 HasRet = true;
3518 }
3519
3520 if (!HasRet)
3521 return false;
3522
3523 Chain = TCChain;
3524 return true;
3525}
3526
3527bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3528 if (!Subtarget->supportsTailCall())
3529 return false;
3530
3531 if (!CI->isTailCall())
3532 return false;
3533
3534 return true;
3535}
3536
3537// Trying to write a 64 bit value so need to split into two 32 bit values first,
3538// and pass the lower and high parts through.
3540 SDLoc DL(Op);
3541 SDValue WriteValue = Op->getOperand(2);
3542
3543 // This function is only supposed to be called for i64 type argument.
3544 assert(WriteValue.getValueType() == MVT::i64
3545 && "LowerWRITE_REGISTER called for non-i64 type argument.");
3546
3547 SDValue Lo, Hi;
3548 std::tie(Lo, Hi) = DAG.SplitScalar(WriteValue, DL, MVT::i32, MVT::i32);
3549 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3550 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3551}
3552
3553// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3554// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3555// one of the above mentioned nodes. It has to be wrapped because otherwise
3556// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3557// be used to form addressing mode. These wrapped nodes will be selected
3558// into MOVi.
3559SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3560 SelectionDAG &DAG) const {
3561 EVT PtrVT = Op.getValueType();
3562 // FIXME there is no actual debug info here
3563 SDLoc dl(Op);
3564 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3565 SDValue Res;
3566
3567 // When generating execute-only code Constant Pools must be promoted to the
3568 // global data section. It's a bit ugly that we can't share them across basic
3569 // blocks, but this way we guarantee that execute-only behaves correct with
3570 // position-independent addressing modes.
3571 if (Subtarget->genExecuteOnly()) {
3572 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3573 auto T = const_cast<Type*>(CP->getType());
3574 auto C = const_cast<Constant*>(CP->getConstVal());
3575 auto M = const_cast<Module*>(DAG.getMachineFunction().
3577 auto GV = new GlobalVariable(
3578 *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3581 Twine(AFI->createPICLabelUId())
3582 );
3583 SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3584 dl, PtrVT);
3585 return LowerGlobalAddress(GA, DAG);
3586 }
3587
3588 // The 16-bit ADR instruction can only encode offsets that are multiples of 4,
3589 // so we need to align to at least 4 bytes when we don't have 32-bit ADR.
3590 Align CPAlign = CP->getAlign();
3591 if (Subtarget->isThumb1Only())
3592 CPAlign = std::max(CPAlign, Align(4));
3593 if (CP->isMachineConstantPoolEntry())
3594 Res =
3595 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign);
3596 else
3597 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign);
3598 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3599}
3600
3602 // If we don't have a 32-bit pc-relative branch instruction then the jump
3603 // table consists of block addresses. Usually this is inline, but for
3604 // execute-only it must be placed out-of-line.
3605 if (Subtarget->genExecuteOnly() && !Subtarget->hasV8MBaselineOps())
3608}
3609
3610SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3611 SelectionDAG &DAG) const {
3614 unsigned ARMPCLabelIndex = 0;
3615 SDLoc DL(Op);
3616 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3617 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3618 SDValue CPAddr;
3619 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3620 if (!IsPositionIndependent) {
3621 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3622 } else {
3623 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3624 ARMPCLabelIndex = AFI->createPICLabelUId();
3626 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3627 ARMCP::CPBlockAddress, PCAdj);
3628 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3629 }
3630 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3631 SDValue Result = DAG.getLoad(
3632 PtrVT, DL, DAG.getEntryNode(), CPAddr,
3634 if (!IsPositionIndependent)
3635 return Result;
3636 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3637 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3638}
3639
3640/// Convert a TLS address reference into the correct sequence of loads
3641/// and calls to compute the variable's address for Darwin, and return an
3642/// SDValue containing the final node.
3643
3644/// Darwin only has one TLS scheme which must be capable of dealing with the
3645/// fully general situation, in the worst case. This means:
3646/// + "extern __thread" declaration.
3647/// + Defined in a possibly unknown dynamic library.
3648///
3649/// The general system is that each __thread variable has a [3 x i32] descriptor
3650/// which contains information used by the runtime to calculate the address. The
3651/// only part of this the compiler needs to know about is the first word, which
3652/// contains a function pointer that must be called with the address of the
3653/// entire descriptor in "r0".
3654///
3655/// Since this descriptor may be in a different unit, in general access must
3656/// proceed along the usual ARM rules. A common sequence to produce is:
3657///
3658/// movw rT1, :lower16:_var$non_lazy_ptr
3659/// movt rT1, :upper16:_var$non_lazy_ptr
3660/// ldr r0, [rT1]
3661/// ldr rT2, [r0]
3662/// blx rT2
3663/// [...address now in r0...]
3664SDValue
3665ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3666 SelectionDAG &DAG) const {
3667 assert(Subtarget->isTargetDarwin() &&
3668 "This function expects a Darwin target");
3669 SDLoc DL(Op);
3670
3671 // First step is to get the address of the actua global symbol. This is where
3672 // the TLS descriptor lives.
3673 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3674
3675 // The first entry in the descriptor is a function pointer that we must call
3676 // to obtain the address of the variable.
3677 SDValue Chain = DAG.getEntryNode();
3678 SDValue FuncTLVGet = DAG.getLoad(
3679 MVT::i32, DL, Chain, DescAddr,
3683 Chain = FuncTLVGet.getValue(1);
3684
3686 MachineFrameInfo &MFI = F.getFrameInfo();
3687 MFI.setAdjustsStack(true);
3688
3689 // TLS calls preserve all registers except those that absolutely must be
3690 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3691 // silly).
3692 auto TRI =
3694 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3696
3697 // Finally, we can make the call. This is just a degenerate version of a
3698 // normal AArch64 call node: r0 takes the address of the descriptor, and
3699 // returns the address of the variable in this thread.
3700 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3701 Chain =
3702 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3703 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3704 DAG.getRegisterMask(Mask), Chain.getValue(1));
3705 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3706}
3707
3708SDValue
3709ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3710 SelectionDAG &DAG) const {
3711 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3712
3713 SDValue Chain = DAG.getEntryNode();
3714 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3715 SDLoc DL(Op);
3716
3717 // Load the current TEB (thread environment block)
3718 SDValue Ops[] = {Chain,
3719 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3720 DAG.getTargetConstant(15, DL, MVT::i32),
3721 DAG.getTargetConstant(0, DL, MVT::i32),
3722 DAG.getTargetConstant(13, DL, MVT::i32),
3723 DAG.getTargetConstant(0, DL, MVT::i32),
3724 DAG.getTargetConstant(2, DL, MVT::i32)};
3725 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3726 DAG.getVTList(MVT::i32, MVT::Other), Ops);
3727
3728 SDValue TEB = CurrentTEB.getValue(0);
3729 Chain = CurrentTEB.getValue(1);
3730
3731 // Load the ThreadLocalStoragePointer from the TEB
3732 // A pointer to the TLS array is located at offset 0x2c from the TEB.
3733 SDValue TLSArray =
3734 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3735 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3736
3737 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3738 // offset into the TLSArray.
3739
3740 // Load the TLS index from the C runtime
3741 SDValue TLSIndex =
3742 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3743 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3744 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3745
3746 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3747 DAG.getConstant(2, DL, MVT::i32));
3748 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3749 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3751
3752 // Get the offset of the start of the .tls section (section base)
3753 const auto *GA = cast<GlobalAddressSDNode>(Op);
3754 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3755 SDValue Offset = DAG.getLoad(
3756 PtrVT, DL, Chain,
3757 DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3758 DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3760
3761 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3762}
3763
3764// Lower ISD::GlobalTLSAddress using the "general dynamic" model
3765SDValue
3766ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3767 SelectionDAG &DAG) const {
3768 SDLoc dl(GA);
3769 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3770 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3773 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3775 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3776 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3777 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3778 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3779 Argument = DAG.getLoad(
3780 PtrVT, dl, DAG.getEntryNode(), Argument,
3782 SDValue Chain = Argument.getValue(1);
3783
3784 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3785 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3786
3787 // call __tls_get_addr.
3789 ArgListEntry Entry;
3790 Entry.Node = Argument;
3791 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3792 Args.push_back(Entry);
3793
3794 // FIXME: is there useful debug info available here?
3796 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3798 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3799
3800 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3801 return CallResult.first;
3802}
3803
3804// Lower ISD::GlobalTLSAddress using the "initial exec" or
3805// "local exec" model.
3806SDValue
3807ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3808 SelectionDAG &DAG,
3809 TLSModel::Model model) const {
3810 const GlobalValue *GV = GA->getGlobal();
3811 SDLoc dl(GA);
3813 SDValue Chain = DAG.getEntryNode();
3814 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3815 // Get the Thread Pointer
3817
3818 if (model == TLSModel::InitialExec) {
3821 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3822 // Initial exec model.
3823 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3825 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3827 true);
3828 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3829 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3830 Offset = DAG.getLoad(
3831 PtrVT, dl, Chain, Offset,
3833 Chain = Offset.getValue(1);
3834
3835 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3836 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3837
3838 Offset = DAG.getLoad(
3839 PtrVT, dl, Chain, Offset,
3841 } else {
3842 // local exec model
3843 assert(model == TLSModel::LocalExec);
3846 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3847 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3848 Offset = DAG.getLoad(
3849 PtrVT, dl, Chain, Offset,
3851 }
3852
3853 // The address of the thread local variable is the add of the thread
3854 // pointer with the offset of the variable.
3855 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3856}
3857
3858SDValue
3859ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3860 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3861 if (DAG.getTarget().useEmulatedTLS())
3862 return LowerToTLSEmulatedModel(GA, DAG);
3863
3864 if (Subtarget->isTargetDarwin())
3865 return LowerGlobalTLSAddressDarwin(Op, DAG);
3866
3867 if (Subtarget->isTargetWindows())
3868 return LowerGlobalTLSAddressWindows(Op, DAG);
3869
3870 // TODO: implement the "local dynamic" model
3871 assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3873
3874 switch (model) {
3877 return LowerToTLSGeneralDynamicModel(GA, DAG);
3880 return LowerToTLSExecModels(GA, DAG, model);
3881 }
3882 llvm_unreachable("bogus TLS model");
3883}
3884
3885/// Return true if all users of V are within function F, looking through
3886/// ConstantExprs.
3887static bool allUsersAreInFunction(const Value *V, const Function *F) {
3888 SmallVector<const User*,4> Worklist(V->users());
3889 while (!Worklist.empty()) {
3890 auto *U = Worklist.pop_back_val();
3891 if (isa<ConstantExpr>(U)) {
3892 append_range(Worklist, U->users());
3893 continue;
3894 }
3895
3896 auto *I = dyn_cast<Instruction>(U);
3897 if (!I || I->getParent()->getParent() != F)
3898 return false;
3899 }
3900 return true;
3901}
3902
3904 const GlobalValue *GV, SelectionDAG &DAG,
3905 EVT PtrVT, const SDLoc &dl) {
3906 // If we're creating a pool entry for a constant global with unnamed address,
3907 // and the global is small enough, we can emit it inline into the constant pool
3908 // to save ourselves an indirection.
3909 //
3910 // This is a win if the constant is only used in one function (so it doesn't
3911 // need to be duplicated) or duplicating the constant wouldn't increase code
3912 // size (implying the constant is no larger than 4 bytes).
3913 const Function &F = DAG.getMachineFunction().getFunction();
3914
3915 // We rely on this decision to inline being idemopotent and unrelated to the
3916 // use-site. We know that if we inline a variable at one use site, we'll
3917 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3918 // doesn't know about this optimization, so bail out if it's enabled else
3919 // we could decide to inline here (and thus never emit the GV) but require
3920 // the GV from fast-isel generated code.
3923 return SDValue();
3924
3925 auto *GVar = dyn_cast<GlobalVariable>(GV);
3926 if (!GVar || !GVar->hasInitializer() ||
3927 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3928 !GVar->hasLocalLinkage())
3929 return SDValue();
3930
3931 // If we inline a value that contains relocations, we move the relocations
3932 // from .data to .text. This is not allowed in position-independent code.
3933 auto *Init = GVar->getInitializer();
3934 if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3935 Init->needsDynamicRelocation())
3936 return SDValue();
3937
3938 // The constant islands pass can only really deal with alignment requests
3939 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3940 // any type wanting greater alignment requirements than 4 bytes. We also
3941 // can only promote constants that are multiples of 4 bytes in size or
3942 // are paddable to a multiple of 4. Currently we only try and pad constants
3943 // that are strings for simplicity.
3944 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3945 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3946 Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
3947 unsigned RequiredPadding = 4 - (Size % 4);
3948 bool PaddingPossible =
3949 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3950 if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
3951 Size == 0)
3952 return SDValue();
3953
3954 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3957
3958 // We can't bloat the constant pool too much, else the ConstantIslands pass
3959 // may fail to converge. If we haven't promoted this global yet (it may have
3960 // multiple uses), and promoting it would increase the constant pool size (Sz
3961 // > 4), ensure we have space to do so up to MaxTotal.
3962 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3963 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3965 return SDValue();
3966
3967 // This is only valid if all users are in a single function; we can't clone
3968 // the constant in general. The LLVM IR unnamed_addr allows merging
3969 // constants, but not cloning them.
3970 //
3971 // We could potentially allow cloning if we could prove all uses of the
3972 // constant in the current function don't care about the address, like
3973 // printf format strings. But that isn't implemented for now.
3974 if (!allUsersAreInFunction(GVar, &F))
3975 return SDValue();
3976
3977 // We're going to inline this global. Pad it out if needed.
3978 if (RequiredPadding != 4) {
3979 StringRef S = CDAInit->getAsString();
3980
3982 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3983 while (RequiredPadding--)
3984 V.push_back(0);
3986 }
3987
3988 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3989 SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4));
3990 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3993 PaddedSize - 4);
3994 }
3995 ++NumConstpoolPromoted;
3996 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3997}
3998
4000 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
4001 if (!(GV = GA->getAliaseeObject()))
4002 return false;
4003 if (const auto *V = dyn_cast<GlobalVariable>(GV))
4004 return V->isConstant();
4005 return isa<Function>(GV);
4006}
4007
4008SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
4009 SelectionDAG &DAG) const {
4010 switch (Subtarget->getTargetTriple().getObjectFormat()) {
4011 default: llvm_unreachable("unknown object format");
4012 case Triple::COFF:
4013 return LowerGlobalAddressWindows(Op, DAG);
4014 case Triple::ELF:
4015 return LowerGlobalAddressELF(Op, DAG);
4016 case Triple::MachO:
4017 return LowerGlobalAddressDarwin(Op, DAG);
4018 }
4019}
4020
4021SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
4022 SelectionDAG &DAG) const {
4023 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4024 SDLoc dl(Op);
4025 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4026 bool IsRO = isReadOnly(GV);
4027
4028 // promoteToConstantPool only if not generating XO text section
4029 if (GV->isDSOLocal() && !Subtarget->genExecuteOnly())
4030 if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
4031 return V;
4032
4033 if (isPositionIndependent()) {
4035 GV, dl, PtrVT, 0, GV->isDSOLocal() ? 0 : ARMII::MO_GOT);
4036 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
4037 if (!GV->isDSOLocal())
4038 Result =
4039 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
4041 return Result;
4042 } else if (Subtarget->isROPI() && IsRO) {
4043 // PC-relative.
4044 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
4045 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
4046 return Result;
4047 } else if (Subtarget->isRWPI() && !IsRO) {
4048 // SB-relative.
4049 SDValue RelAddr;
4050 if (Subtarget->useMovt()) {
4051 ++NumMovwMovt;
4052 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
4053 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
4054 } else { // use literal pool for address constant
4057 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
4058 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4059 RelAddr = DAG.getLoad(
4060 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4062 }
4063 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
4064 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
4065 return Result;
4066 }
4067
4068 // If we have T2 ops, we can materialize the address directly via movt/movw
4069 // pair. This is always cheaper. If need to generate Execute Only code, and we
4070 // only have Thumb1 available, we can't use a constant pool and are forced to
4071 // use immediate relocations.
4072 if (Subtarget->useMovt() || Subtarget->genExecuteOnly()) {
4073 if (Subtarget->useMovt())
4074 ++NumMovwMovt;
4075 // FIXME: Once remat is capable of dealing with instructions with register
4076 // operands, expand this into two nodes.
4077 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
4078 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
4079 } else {
4080 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4));
4081 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4082 return DAG.getLoad(
4083 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4085 }
4086}
4087
4088SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
4089 SelectionDAG &DAG) const {
4090 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
4091 "ROPI/RWPI not currently supported for Darwin");
4092 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4093 SDLoc dl(Op);
4094 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4095
4096 if (Subtarget->useMovt())
4097 ++NumMovwMovt;
4098
4099 // FIXME: Once remat is capable of dealing with instructions with register
4100 // operands, expand this into multiple nodes
4101 unsigned Wrapper =
4103
4104 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
4105 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
4106
4107 if (Subtarget->isGVIndirectSymbol(GV))
4108 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
4110 return Result;
4111}
4112
4113SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
4114 SelectionDAG &DAG) const {
4115 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
4116 assert(Subtarget->useMovt() &&
4117 "Windows on ARM expects to use movw/movt");
4118 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
4119 "ROPI/RWPI not currently supported for Windows");
4120
4122 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4123 ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
4124 if (GV->hasDLLImportStorageClass())
4125 TargetFlags = ARMII::MO_DLLIMPORT;
4126 else if (!TM.shouldAssumeDSOLocal(GV))
4127 TargetFlags = ARMII::MO_COFFSTUB;
4128 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4130 SDLoc DL(Op);
4131
4132 ++NumMovwMovt;
4133
4134 // FIXME: Once remat is capable of dealing with instructions with register
4135 // operands, expand this into two nodes.
4136 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
4137 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
4138 TargetFlags));
4139 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
4140 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4142 return Result;
4143}
4144
4145SDValue
4146ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
4147 SDLoc dl(Op);
4148 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
4149 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
4150 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
4151 Op.getOperand(1), Val);
4152}
4153
4154SDValue
4155ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
4156 SDLoc dl(Op);
4157 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
4158 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
4159}
4160
4161SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
4162 SelectionDAG &DAG) const {
4163 SDLoc dl(Op);
4164 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
4165 Op.getOperand(0));
4166}
4167
4168SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
4169 SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
4170 unsigned IntNo =
4171 Op.getConstantOperandVal(Op.getOperand(0).getValueType() == MVT::Other);
4172 switch (IntNo) {
4173 default:
4174 return SDValue(); // Don't custom lower most intrinsics.
4175 case Intrinsic::arm_gnu_eabi_mcount: {
4177 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4178 SDLoc dl(Op);
4179 SDValue Chain = Op.getOperand(0);
4180 // call "\01__gnu_mcount_nc"
4181 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
4182 const uint32_t *Mask =
4184 assert(Mask && "Missing call preserved mask for calling convention");
4185 // Mark LR an implicit live-in.
4186 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4187 SDValue ReturnAddress =
4188 DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
4189 constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
4190 SDValue Callee =
4191 DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
4193 if (Subtarget->isThumb())
4194 return SDValue(
4195 DAG.getMachineNode(
4196 ARM::tBL_PUSHLR, dl, ResultTys,
4197 {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
4198 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
4199 0);
4200 return SDValue(
4201 DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
4202 {ReturnAddress, Callee, RegisterMask, Chain}),
4203 0);
4204 }
4205 }
4206}
4207
4208SDValue
4209ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
4210 const ARMSubtarget *Subtarget) const {
4211 unsigned IntNo = Op.getConstantOperandVal(0);
4212 SDLoc dl(Op);
4213 switch (IntNo) {
4214 default: return SDValue(); // Don't custom lower most intrinsics.
4215 case Intrinsic::thread_pointer: {
4216 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4217 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
4218 }
4219 case Intrinsic::arm_cls: {
4220 const SDValue &Operand = Op.getOperand(1);
4221 const EVT VTy = Op.getValueType();
4222 SDValue SRA =
4223 DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
4224 SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
4225 SDValue SHL =
4226 DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
4227 SDValue OR =
4228 DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
4229 SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
4230 return Result;
4231 }
4232 case Intrinsic::arm_cls64: {
4233 // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
4234 // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
4235 const SDValue &Operand = Op.getOperand(1);
4236 const EVT VTy = Op.getValueType();
4237 SDValue Lo, Hi;
4238 std::tie(Lo, Hi) = DAG.SplitScalar(Operand, dl, VTy, VTy);
4239 SDValue Constant0 = DAG.getConstant(0, dl, VTy);
4240 SDValue Constant1 = DAG.getConstant(1, dl, VTy);
4241 SDValue Constant31 = DAG.getConstant(31, dl, VTy);
4242 SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
4243 SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
4244 SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
4245 SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
4246 SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
4247 SDValue CheckLo =
4248 DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
4249 SDValue HiIsZero =
4250 DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
4251 SDValue AdjustedLo =
4252 DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
4253 SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
4254 SDValue Result =
4255 DAG.getSelect(dl, VTy, CheckLo,
4256 DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
4257 return Result;
4258 }
4259 case Intrinsic::eh_sjlj_lsda: {
4262 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
4263 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4264 SDValue CPAddr;
4265 bool IsPositionIndependent = isPositionIndependent();
4266 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
4268 ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
4269 ARMCP::CPLSDA, PCAdj);
4270 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
4271 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4272 SDValue Result = DAG.getLoad(
4273 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4275
4276 if (IsPositionIndependent) {
4277 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
4278 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
4279 }
4280 return Result;
4281 }
4282 case Intrinsic::arm_neon_vabs:
4283 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
4284 Op.getOperand(1));
4285 case Intrinsic::arm_neon_vabds:
4286 if (Op.getValueType().isInteger())
4287 return DAG.getNode(ISD::ABDS, SDLoc(Op), Op.getValueType(),
4288 Op.getOperand(1), Op.getOperand(2));
4289 return SDValue();
4290 case Intrinsic::arm_neon_vabdu:
4291 return DAG.getNode(ISD::ABDU, SDLoc(Op), Op.getValueType(),
4292 Op.getOperand(1), Op.getOperand(2));
4293 case Intrinsic::arm_neon_vmulls:
4294 case Intrinsic::arm_neon_vmullu: {
4295 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
4297 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4298 Op.getOperand(1), Op.getOperand(2));
4299 }
4300 case Intrinsic::arm_neon_vminnm:
4301 case Intrinsic::arm_neon_vmaxnm: {
4302 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
4304 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4305 Op.getOperand(1), Op.getOperand(2));
4306 }
4307 case Intrinsic::arm_neon_vminu:
4308 case Intrinsic::arm_neon_vmaxu: {
4309 if (Op.getValueType().isFloatingPoint())
4310 return SDValue();
4311 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
4312 ? ISD::UMIN : ISD::UMAX;
4313 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4314 Op.getOperand(1), Op.getOperand(2));
4315 }
4316 case Intrinsic::arm_neon_vmins:
4317 case Intrinsic::arm_neon_vmaxs: {
4318 // v{min,max}s is overloaded between signed integers and floats.
4319 if (!Op.getValueType().isFloatingPoint()) {
4320 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4321 ? ISD::SMIN : ISD::SMAX;
4322 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4323 Op.getOperand(1), Op.getOperand(2));
4324 }
4325 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4327 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4328 Op.getOperand(1), Op.getOperand(2));
4329 }
4330 case Intrinsic::arm_neon_vtbl1:
4331 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
4332 Op.getOperand(1), Op.getOperand(2));
4333 case Intrinsic::arm_neon_vtbl2:
4334 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
4335 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4336 case Intrinsic::arm_mve_pred_i2v:
4337 case Intrinsic::arm_mve_pred_v2i:
4338 return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),
4339 Op.getOperand(1));
4340 case Intrinsic::arm_mve_vreinterpretq:
4341 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),
4342 Op.getOperand(1));
4343 case Intrinsic::arm_mve_lsll:
4344 return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
4345 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4346 case Intrinsic::arm_mve_asrl:
4347 return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
4348 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4349 }
4350}
4351
4353 const ARMSubtarget *Subtarget) {
4354 SDLoc dl(Op);
4355 auto SSID = static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4356 if (SSID == SyncScope::SingleThread)
4357 return Op;
4358
4359 if (!Subtarget->hasDataBarrier()) {
4360 // Some ARMv6 cpus can support data barriers with an mcr instruction.
4361 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
4362 // here.
4363 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
4364 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
4365 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
4366 DAG.getConstant(0, dl, MVT::i32));
4367 }
4368
4369 AtomicOrdering Ord =
4370 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4372 if (Subtarget->isMClass()) {
4373 // Only a full system barrier exists in the M-class architectures.
4375 } else if (Subtarget->preferISHSTBarriers() &&
4376 Ord == AtomicOrdering::Release) {
4377 // Swift happens to implement ISHST barriers in a way that's compatible with
4378 // Release semantics but weaker than ISH so we'd be fools not to use
4379 // it. Beware: other processors probably don't!
4381 }
4382
4383 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
4384 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
4385 DAG.getConstant(Domain, dl, MVT::i32));
4386}
4387
4389 const ARMSubtarget *Subtarget) {
4390 // ARM pre v5TE and Thumb1 does not have preload instructions.
4391 if (!(Subtarget->isThumb2() ||
4392 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
4393 // Just preserve the chain.
4394 return Op.getOperand(0);
4395
4396 SDLoc dl(Op);
4397 unsigned isRead = ~Op.getConstantOperandVal(2) & 1;
4398 if (!isRead &&
4399 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4400 // ARMv7 with MP extension has PLDW.
4401 return Op.getOperand(0);
4402
4403 unsigned isData = Op.getConstantOperandVal(4);
4404 if (Subtarget->isThumb()) {
4405 // Invert the bits.
4406 isRead = ~isRead & 1;
4407 isData = ~isData & 1;
4408 }
4409
4410 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
4411 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
4412 DAG.getConstant(isData, dl, MVT::i32));
4413}
4414
4417 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
4418
4419 // vastart just stores the address of the VarArgsFrameIndex slot into the
4420 // memory location argument.
4421 SDLoc dl(Op);
4423 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4424 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4425 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4426 MachinePointerInfo(SV));
4427}
4428
4429SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
4430 CCValAssign &NextVA,
4431 SDValue &Root,
4432 SelectionDAG &DAG,
4433 const SDLoc &dl) const {
4436
4437 const TargetRegisterClass *RC;
4438 if (AFI->isThumb1OnlyFunction())
4439 RC = &ARM::tGPRRegClass;
4440 else
4441 RC = &ARM::GPRRegClass;
4442
4443 // Transform the arguments stored in physical registers into virtual ones.
4444 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4445 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4446
4447 SDValue ArgValue2;
4448 if (NextVA.isMemLoc()) {
4449 MachineFrameInfo &MFI = MF.getFrameInfo();
4450 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
4451
4452 // Create load node to retrieve arguments from the stack.
4453 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4454 ArgValue2 = DAG.getLoad(
4455 MVT::i32, dl, Root, FIN,
4457 } else {
4458 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
4459 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4460 }
4461 if (!Subtarget->isLittle())
4462 std::swap (ArgValue, ArgValue2);
4463 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
4464}
4465
4466// The remaining GPRs hold either the beginning of variable-argument
4467// data, or the beginning of an aggregate passed by value (usually
4468// byval). Either way, we allocate stack slots adjacent to the data
4469// provided by our caller, and store the unallocated registers there.
4470// If this is a variadic function, the va_list pointer will begin with
4471// these values; otherwise, this reassembles a (byval) structure that
4472// was split between registers and memory.
4473// Return: The frame index registers were stored into.
4474int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
4475 const SDLoc &dl, SDValue &Chain,
4476 const Value *OrigArg,
4477 unsigned InRegsParamRecordIdx,
4478 int ArgOffset, unsigned ArgSize) const {
4479 // Currently, two use-cases possible:
4480 // Case #1. Non-var-args function, and we meet first byval parameter.
4481 // Setup first unallocated register as first byval register;
4482 // eat all remained registers
4483 // (these two actions are performed by HandleByVal method).
4484 // Then, here, we initialize stack frame with
4485 // "store-reg" instructions.
4486 // Case #2. Var-args function, that doesn't contain byval parameters.
4487 // The same: eat all remained unallocated registers,
4488 // initialize stack frame.
4489
4491 MachineFrameInfo &MFI = MF.getFrameInfo();
4493 unsigned RBegin, REnd;
4494 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
4495 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
4496 } else {
4497 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4498 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
4499 REnd = ARM::R4;
4500 }
4501
4502 if (REnd != RBegin)
4503 ArgOffset = -4 * (ARM::R4 - RBegin);
4504
4505 auto PtrVT = getPointerTy(DAG.getDataLayout());
4506 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
4507 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
4508
4510 const TargetRegisterClass *RC =
4511 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
4512
4513 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
4514 Register VReg = MF.addLiveIn(Reg, RC);
4515 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4516 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4517 MachinePointerInfo(OrigArg, 4 * i));
4518 MemOps.push_back(Store);
4519 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
4520 }
4521
4522 if (!MemOps.empty())
4523 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4524 return FrameIndex;
4525}
4526
4527// Setup stack frame, the va_list pointer will start from.
4528void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
4529 const SDLoc &dl, SDValue &Chain,
4530 unsigned ArgOffset,
4531 unsigned TotalArgRegsSaveSize,
4532 bool ForceMutable) const {
4535
4536 // Try to store any remaining integer argument regs
4537 // to their spots on the stack so that they may be loaded by dereferencing
4538 // the result of va_next.
4539 // If there is no regs to be stored, just point address after last
4540 // argument passed via stack.
4541 int FrameIndex = StoreByValRegs(
4542 CCInfo, DAG, dl, Chain, nullptr, CCInfo.getInRegsParamsCount(),
4543 CCInfo.getStackSize(), std::max(4U, TotalArgRegsSaveSize));
4544 AFI->setVarArgsFrameIndex(FrameIndex);
4545}
4546
4547bool ARMTargetLowering::splitValueIntoRegisterParts(
4548 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
4549 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
4550 EVT ValueVT = Val.getValueType();
4551 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4552 unsigned ValueBits = ValueVT.getSizeInBits();
4553 unsigned PartBits = PartVT.getSizeInBits();
4554 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
4555 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
4556 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
4557 Parts[0] = Val;
4558 return true;
4559 }
4560 return false;
4561}
4562
4563SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
4564 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
4565 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
4566 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4567 unsigned ValueBits = ValueVT.getSizeInBits();
4568 unsigned PartBits = PartVT.getSizeInBits();
4569 SDValue Val = Parts[0];
4570
4571 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
4572 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
4573 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
4574 return Val;
4575 }
4576 return SDValue();
4577}
4578
4579SDValue ARMTargetLowering::LowerFormalArguments(
4580 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4581 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4582 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4584 MachineFrameInfo &MFI = MF.getFrameInfo();
4585
4587
4588 // Assign locations to all of the incoming arguments.
4590 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4591 *DAG.getContext());
4592 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
4593
4595 unsigned CurArgIdx = 0;
4596
4597 // Initially ArgRegsSaveSize is zero.
4598 // Then we increase this value each time we meet byval parameter.
4599 // We also increase this value in case of varargs function.
4600 AFI->setArgRegsSaveSize(0);
4601
4602 // Calculate the amount of stack space that we need to allocate to store
4603 // byval and variadic arguments that are passed in registers.
4604 // We need to know this before we allocate the first byval or variadic
4605 // argument, as they will be allocated a stack slot below the CFA (Canonical
4606 // Frame Address, the stack pointer at entry to the function).
4607 unsigned ArgRegBegin = ARM::R4;
4608 for (const CCValAssign &VA : ArgLocs) {
4609 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
4610 break;
4611
4612 unsigned Index = VA.getValNo();
4613 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
4614 if (!Flags.isByVal())
4615 continue;
4616
4617 assert(VA.isMemLoc() && "unexpected byval pointer in reg");
4618 unsigned RBegin, REnd;
4619 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
4620 ArgRegBegin = std::min(ArgRegBegin, RBegin);
4621
4622 CCInfo.nextInRegsParam();
4623 }
4624 CCInfo.rewindByValRegsInfo();
4625
4626 int lastInsIndex = -1;
4627 if (isVarArg && MFI.hasVAStart()) {
4628 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4629 if (RegIdx != std::size(GPRArgRegs))
4630 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
4631 }
4632
4633 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4634 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
4635 auto PtrVT = getPointerTy(DAG.getDataLayout());
4636
4637 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4638 CCValAssign &VA = ArgLocs[i];
4639 if (Ins[VA.getValNo()].isOrigArg()) {
4640 std::advance(CurOrigArg,
4641 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
4642 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
4643 }
4644 // Arguments stored in registers.
4645 if (VA.isRegLoc()) {
4646 EVT RegVT = VA.getLocVT();
4647 SDValue ArgValue;
4648
4649 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
4650 // f64 and vector types are split up into multiple registers or
4651 // combinations of registers and stack slots.
4652 SDValue ArgValue1 =
4653 GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4654 VA = ArgLocs[++i]; // skip ahead to next loc
4655 SDValue ArgValue2;
4656 if (VA.isMemLoc()) {
4657 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
4658 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4659 ArgValue2 = DAG.getLoad(
4660 MVT::f64, dl, Chain, FIN,
4662 } else {
4663 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4664 }
4665 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
4666 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4667 ArgValue1, DAG.getIntPtrConstant(0, dl));
4668 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4669 ArgValue2, DAG.getIntPtrConstant(1, dl));
4670 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
4671 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4672 } else {
4673 const TargetRegisterClass *RC;
4674
4675 if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4676 RC = &ARM::HPRRegClass;
4677 else if (RegVT == MVT::f32)
4678 RC = &ARM::SPRRegClass;
4679 else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||
4680 RegVT == MVT::v4bf16)
4681 RC = &ARM::DPRRegClass;
4682 else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||
4683 RegVT == MVT::v8bf16)
4684 RC = &ARM::QPRRegClass;
4685 else if (RegVT == MVT::i32)
4686 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4687 : &ARM::GPRRegClass;
4688 else
4689 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
4690
4691 // Transform the arguments in physical registers into virtual ones.
4692 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4693 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4694
4695 // If this value is passed in r0 and has the returned attribute (e.g.
4696 // C++ 'structors), record this fact for later use.
4697 if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
4698 AFI->setPreservesR0();
4699 }
4700 }
4701
4702 // If this is an 8 or 16-bit value, it is really passed promoted
4703 // to 32 bits. Insert an assert[sz]ext to capture this, then
4704 // truncate to the right size.
4705 switch (VA.getLocInfo()) {
4706 default: llvm_unreachable("Unknown loc info!");
4707 case CCValAssign::Full: break;
4708 case CCValAssign::BCvt:
4709 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
4710 break;
4711 }
4712
4713 // f16 arguments have their size extended to 4 bytes and passed as if they
4714 // had been copied to the LSBs of a 32-bit register.
4715 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
4716 if (VA.needsCustom() &&
4717 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
4718 ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue);
4719
4720 // On CMSE Entry Functions, formal integer arguments whose bitwidth is
4721 // less than 32 bits must be sign- or zero-extended in the callee for
4722 // security reasons. Although the ABI mandates an extension done by the
4723 // caller, the latter cannot be trusted to follow the rules of the ABI.
4724 const ISD::InputArg &Arg = Ins[VA.getValNo()];
4725 if (AFI->isCmseNSEntryFunction() && Arg.ArgVT.isScalarInteger() &&
4726 RegVT.isScalarInteger() && Arg.ArgVT.bitsLT(MVT::i32))
4727 ArgValue = handleCMSEValue(ArgValue, Arg, DAG, dl);
4728
4729 InVals.push_back(ArgValue);
4730 } else { // VA.isRegLoc()
4731 // Only arguments passed on the stack should make it here.
4732 assert(VA.isMemLoc());
4733 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
4734
4735 int index = VA.getValNo();
4736
4737 // Some Ins[] entries become multiple ArgLoc[] entries.
4738 // Process them only once.
4739 if (index != lastInsIndex)
4740 {
4741 ISD::ArgFlagsTy Flags = Ins[index].Flags;
4742 // FIXME: For now, all byval parameter objects are marked mutable.
4743 // This can be changed with more analysis.
4744 // In case of tail call optimization mark all arguments mutable.
4745 // Since they could be overwritten by lowering of arguments in case of
4746 // a tail call.
4747 if (Flags.isByVal()) {
4748 assert(Ins[index].isOrigArg() &&
4749 "Byval arguments cannot be implicit");
4750 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
4751
4752 int FrameIndex = StoreByValRegs(
4753 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4754 VA.getLocMemOffset(), Flags.getByValSize());
4755 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
4756 CCInfo.nextInRegsParam();
4757 } else {
4758 unsigned FIOffset = VA.getLocMemOffset();
4759 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
4760 FIOffset, true);
4761
4762 // Create load nodes to retrieve arguments from the stack.
4763 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4764 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
4766 DAG.getMachineFunction(), FI)));
4767 }
4768 lastInsIndex = index;
4769 }
4770 }
4771 }
4772
4773 // varargs
4774 if (isVarArg && MFI.hasVAStart()) {
4775 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getStackSize(),
4776 TotalArgRegsSaveSize);
4777 if (AFI->isCmseNSEntryFunction()) {
4780 "secure entry function must not be variadic", dl.getDebugLoc());
4781 DAG.getContext()->diagnose(Diag);
4782 }
4783 }
4784
4785 unsigned StackArgSize = CCInfo.getStackSize();
4786 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
4787 if (canGuaranteeTCO(CallConv, TailCallOpt)) {
4788 // The only way to guarantee a tail call is if the callee restores its
4789 // argument area, but it must also keep the stack aligned when doing so.
4790 MaybeAlign StackAlign = DAG.getDataLayout().getStackAlignment();
4791 assert(StackAlign && "data layout string is missing stack alignment");
4792 StackArgSize = alignTo(StackArgSize, *StackAlign);
4793
4794 AFI->setArgumentStackToRestore(StackArgSize);
4795 }
4796 AFI->setArgumentStackSize(StackArgSize);
4797
4798 if (CCInfo.getStackSize() > 0 && AFI->isCmseNSEntryFunction()) {
4801 "secure entry function requires arguments on stack", dl.getDebugLoc());
4802 DAG.getContext()->diagnose(Diag);
4803 }
4804
4805 return Chain;
4806}
4807
4808/// isFloatingPointZero - Return true if this is +0.0.
4810 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
4811 return CFP->getValueAPF().isPosZero();
4812 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
4813 // Maybe this has already been legalized into the constant pool?
4814 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
4815 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
4816 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
4817 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
4818 return CFP->getValueAPF().isPosZero();
4819 }
4820 } else if (Op->getOpcode() == ISD::BITCAST &&
4821 Op->getValueType(0) == MVT::f64) {
4822 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
4823 // created by LowerConstantFP().
4824 SDValue BitcastOp = Op->getOperand(0);
4825 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
4826 isNullConstant(BitcastOp->getOperand(0)))
4827 return true;
4828 }
4829 return false;
4830}
4831
4832/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
4833/// the given operands.
4834SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4835 SDValue &ARMcc, SelectionDAG &DAG,
4836 const SDLoc &dl) const {
4837 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
4838 unsigned C = RHSC->getZExtValue();
4839 if (!isLegalICmpImmediate((int32_t)C)) {
4840 // Constant does not fit, try adjusting it by one.
4841 switch (CC) {
4842 default: break;
4843 case ISD::SETLT:
4844 case ISD::SETGE:
4845 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4847 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4848 }
4849 break;
4850 case ISD::SETULT:
4851 case ISD::SETUGE:
4852 if (C != 0 && isLegalICmpImmediate(C-1)) {
4854 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4855 }
4856 break;
4857 case ISD::SETLE:
4858 case ISD::SETGT:
4859 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
4861 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4862 }
4863 break;
4864 case ISD::SETULE:
4865 case ISD::SETUGT:
4866 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
4868 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4869 }
4870 break;
4871 }
4872 }
4873 } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
4875 // In ARM and Thumb-2, the compare instructions can shift their second
4876 // operand.
4878 std::swap(LHS, RHS);
4879 }
4880
4881 // Thumb1 has very limited immediate modes, so turning an "and" into a
4882 // shift can save multiple instructions.
4883 //
4884 // If we have (x & C1), and C1 is an appropriate mask, we can transform it
4885 // into "((x << n) >> n)". But that isn't necessarily profitable on its
4886 // own. If it's the operand to an unsigned comparison with an immediate,
4887 // we can eliminate one of the shifts: we transform
4888 // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)".
4889 //
4890 // We avoid transforming cases which aren't profitable due to encoding
4891 // details:
4892 //
4893 // 1. C2 fits into the immediate field of a cmp, and the transformed version
4894 // would not; in that case, we're essentially trading one immediate load for
4895 // another.
4896 // 2. C1 is 255 or 65535, so we can use uxtb or uxth.
4897 // 3. C2 is zero; we have other code for this special case.
4898 //
4899 // FIXME: Figure out profitability for Thumb2; we usually can't save an
4900 // instruction, since the AND is always one instruction anyway, but we could
4901 // use narrow instructions in some cases.
4902 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
4903 LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4904 LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&
4905 !isSignedIntSetCC(CC)) {
4906 unsigned Mask = LHS.getConstantOperandVal(1);
4907 auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
4908 uint64_t RHSV = RHSC->getZExtValue();
4909 if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
4910 unsigned ShiftBits = llvm::countl_zero(Mask);
4911 if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4912 SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);
4913 LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);
4914 RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32);
4915 }
4916 }
4917 }
4918
4919 // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a
4920 // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same
4921 // way a cmp would.
4922 // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and
4923 // some tweaks to the heuristics for the previous and->shift transform.
4924 // FIXME: Optimize cases where the LHS isn't a shift.
4925 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
4926 isa<ConstantSDNode>(RHS) && RHS->getAsZExtVal() == 0x80000000U &&
4927 CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4928 LHS.getConstantOperandVal(1) < 31) {
4929 unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1;
4930 SDValue Shift =
4931 DAG.getNode(ARMISD::LSLS, dl, DAG.getVTList(MVT::i32, FlagsVT),
4932 LHS.getOperand(0), DAG.getConstant(ShiftAmt, dl, MVT::i32));
4933 ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
4934 return Shift.getValue(1);
4935 }
4936
4938
4939 // If the RHS is a constant zero then the V (overflow) flag will never be
4940 // set. This can allow us to simplify GE to PL or LT to MI, which can be
4941 // simpler for other passes (like the peephole optimiser) to deal with.
4942 if (isNullConstant(RHS)) {
4943 switch (CondCode) {
4944 default: break;
4945 case ARMCC::GE:
4947 break;
4948 case ARMCC::LT:
4950 break;
4951 }
4952 }
4953
4954 ARMISD::NodeType CompareType;
4955 switch (CondCode) {
4956 default:
4957 CompareType = ARMISD::CMP;
4958 break;
4959 case ARMCC::EQ:
4960 case ARMCC::NE:
4961 // Uses only Z Flag
4962 CompareType = ARMISD::CMPZ;
4963 break;
4964 }
4965 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4966 return DAG.getNode(CompareType, dl, FlagsVT, LHS, RHS);
4967}
4968
4969/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
4970SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
4971 SelectionDAG &DAG, const SDLoc &dl,
4972 bool Signaling) const {
4973 assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
4974 SDValue Flags;
4975 if (!isFloatingPointZero(RHS))
4976 Flags = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP, dl, FlagsVT,
4977 LHS, RHS);
4978 else
4979 Flags = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, dl,
4980 FlagsVT, LHS);
4981 return DAG.getNode(ARMISD::FMSTAT, dl, FlagsVT, Flags);
4982}
4983
4984// This function returns three things: the arithmetic computation itself
4985// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
4986// comparison and the condition code define the case in which the arithmetic
4987// computation *does not* overflow.
4988std::pair<SDValue, SDValue>
4989ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
4990 SDValue &ARMcc) const {
4991 assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
4992
4993 SDValue Value, OverflowCmp;
4994 SDValue LHS = Op.getOperand(0);
4995 SDValue RHS = Op.getOperand(1);
4996 SDLoc dl(Op);
4997
4998 // FIXME: We are currently always generating CMPs because we don't support
4999 // generating CMN through the backend. This is not as good as the natural
5000 // CMP case because it causes a register dependency and cannot be folded
5001 // later.
5002
5003 switch (Op.getOpcode()) {
5004 default:
5005 llvm_unreachable("Unknown overflow instruction!");
5006 case ISD::SADDO:
5007 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
5008 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
5009 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
5010 break;
5011 case ISD::UADDO:
5012 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
5013 // We use ADDC here to correspond to its use in LowerUnsignedALUO.
5014 // We do not use it in the USUBO case as Value may not be used.
5015 Value = DAG.getNode(ARMISD::ADDC, dl,
5016 DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
5017 .getValue(0);
5018 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
5019 break;
5020 case ISD::SSUBO:
5021 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
5022 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
5023 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
5024 break;
5025 case ISD::USUBO:
5026 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
5027 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
5028 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
5029 break;
5030 case ISD::UMULO:
5031 // We generate a UMUL_LOHI and then check if the high word is 0.
5032 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
5033 Value = DAG.getNode(ISD::UMUL_LOHI, dl,
5034 DAG.getVTList(Op.getValueType(), Op.getValueType()),
5035 LHS, RHS);
5036 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
5037 DAG.getConstant(0, dl, MVT::i32));
5038 Value = Value.getValue(0); // We only want the low 32 bits for the result.
5039 break;
5040 case ISD::SMULO:
5041 // We generate a SMUL_LOHI and then check if all the bits of the high word
5042 // are the same as the sign bit of the low word.
5043 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
5044 Value = DAG.getNode(ISD::SMUL_LOHI, dl,
5045 DAG.getVTList(Op.getValueType(), Op.getValueType()),
5046 LHS, RHS);
5047 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
5048 DAG.getNode(ISD::SRA, dl, Op.getValueType(),
5049 Value.getValue(0),
5050 DAG.getConstant(31, dl, MVT::i32)));
5051 Value = Value.getValue(0); // We only want the low 32 bits for the result.
5052 break;
5053 } // switch (...)
5054
5055 return std::make_pair(Value, OverflowCmp);
5056}
5057
5058SDValue
5059ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
5060 // Let legalize expand this if it isn't a legal type yet.
5061 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
5062 return SDValue();
5063
5064 SDValue Value, OverflowCmp;
5065 SDValue ARMcc;
5066 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
5067 SDLoc dl(Op);
5068 // We use 0 and 1 as false and true values.
5069 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
5070 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
5071 EVT VT = Op.getValueType();
5072
5073 SDValue Overflow =
5074 DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp);
5075
5076 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
5077 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
5078}
5079
5081 SelectionDAG &DAG) {
5082 SDLoc DL(BoolCarry);
5083 EVT CarryVT = BoolCarry.getValueType();
5084
5085 // This converts the boolean value carry into the carry flag by doing
5086 // ARMISD::SUBC Carry, 1
5087 SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
5088 DAG.getVTList(CarryVT, MVT::i32),
5089 BoolCarry, DAG.getConstant(1, DL, CarryVT));
5090 return Carry.getValue(1);
5091}
5092
5094 SelectionDAG &DAG) {
5095 SDLoc DL(Flags);
5096
5097 // Now convert the carry flag into a boolean carry. We do this
5098 // using ARMISD:ADDE 0, 0, Carry
5099 return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
5100 DAG.getConstant(0, DL, MVT::i32),
5101 DAG.getConstant(0, DL, MVT::i32), Flags);
5102}
5103
5104SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
5105 SelectionDAG &DAG) const {
5106 // Let legalize expand this if it isn't a legal type yet.
5107 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
5108 return SDValue();
5109
5110 SDValue LHS = Op.getOperand(0);
5111 SDValue RHS = Op.getOperand(1);
5112 SDLoc dl(Op);
5113
5114 EVT VT = Op.getValueType();
5115 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
5116 SDValue Value;
5117 SDValue Overflow;
5118 switch (Op.getOpcode()) {
5119 default:
5120 llvm_unreachable("Unknown overflow instruction!");
5121 case ISD::UADDO:
5122 Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
5123 // Convert the carry flag into a boolean value.
5124 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5125 break;
5126 case ISD::USUBO: {
5127 Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
5128 // Convert the carry flag into a boolean value.
5129 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5130 // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
5131 // value. So compute 1 - C.
5132 Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
5133 DAG.getConstant(1, dl, MVT::i32), Overflow);
5134 break;
5135 }
5136 }
5137
5138 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
5139}
5140
5142 const ARMSubtarget *Subtarget) {
5143 EVT VT = Op.getValueType();
5144 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || Subtarget->isThumb1Only())
5145 return SDValue();
5146 if (!VT.isSimple())
5147 return SDValue();
5148
5149 unsigned NewOpcode;
5150 switch (VT.getSimpleVT().SimpleTy) {
5151 default:
5152 return SDValue();
5153 case MVT::i8:
5154 switch (Op->getOpcode()) {
5155 case ISD::UADDSAT:
5156 NewOpcode = ARMISD::UQADD8b;
5157 break;
5158 case ISD::SADDSAT:
5159 NewOpcode = ARMISD::QADD8b;
5160 break;
5161 case ISD::USUBSAT:
5162 NewOpcode = ARMISD::UQSUB8b;
5163 break;
5164 case ISD::SSUBSAT:
5165 NewOpcode = ARMISD::QSUB8b;
5166 break;
5167 }
5168 break;
5169 case MVT::i16:
5170 switch (Op->getOpcode()) {
5171 case ISD::UADDSAT:
5172 NewOpcode = ARMISD::UQADD16b;
5173 break;
5174 case ISD::SADDSAT:
5175 NewOpcode = ARMISD::QADD16b;
5176 break;
5177 case ISD::USUBSAT:
5178 NewOpcode = ARMISD::UQSUB16b;
5179 break;
5180 case ISD::SSUBSAT:
5181 NewOpcode = ARMISD::QSUB16b;
5182 break;
5183 }
5184 break;
5185 }
5186
5187 SDLoc dl(Op);
5188 SDValue Add =
5189 DAG.getNode(NewOpcode, dl, MVT::i32,
5190 DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
5191 DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
5192 return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
5193}
5194
5195SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
5196 SDValue Cond = Op.getOperand(0);
5197 SDValue SelectTrue = Op.getOperand(1);
5198 SDValue SelectFalse = Op.getOperand(2);
5199 SDLoc dl(Op);
5200 unsigned Opc = Cond.getOpcode();
5201
5202 if (Cond.getResNo() == 1 &&
5203 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5204 Opc == ISD::USUBO)) {
5205 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5206 return SDValue();
5207
5208 SDValue Value, OverflowCmp;
5209 SDValue ARMcc;
5210 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5211 EVT VT = Op.getValueType();
5212
5213 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, OverflowCmp, DAG);
5214 }
5215
5216 // Convert:
5217 //
5218 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
5219 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
5220 //
5221 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
5222 const ConstantSDNode *CMOVTrue =
5223 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
5224 const ConstantSDNode *CMOVFalse =
5225 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
5226
5227 if (CMOVTrue && CMOVFalse) {
5228 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
5229 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
5230
5231 SDValue True;
5232 SDValue False;
5233 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
5234 True = SelectTrue;
5235 False = SelectFalse;
5236 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
5237 True = SelectFalse;
5238 False = SelectTrue;
5239 }
5240
5241 if (True.getNode() && False.getNode())
5242 return getCMOV(dl, Op.getValueType(), True, False, Cond.getOperand(2),
5243 Cond.getOperand(3), DAG);
5244 }
5245 }
5246
5247 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
5248 // undefined bits before doing a full-word comparison with zero.
5249 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
5250 DAG.getConstant(1, dl, Cond.getValueType()));
5251
5252 return DAG.getSelectCC(dl, Cond,
5253 DAG.getConstant(0, dl, Cond.getValueType()),
5254 SelectTrue, SelectFalse, ISD::SETNE);
5255}
5256
5258 bool &swpCmpOps, bool &swpVselOps) {
5259 // Start by selecting the GE condition code for opcodes that return true for
5260 // 'equality'
5261 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
5262 CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE)
5263 CondCode = ARMCC::GE;
5264
5265 // and GT for opcodes that return false for 'equality'.
5266 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
5267 CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT)
5268 CondCode = ARMCC::GT;
5269
5270 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
5271 // to swap the compare operands.
5272 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
5273 CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT)
5274 swpCmpOps = true;
5275
5276 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
5277 // If we have an unordered opcode, we need to swap the operands to the VSEL
5278 // instruction (effectively negating the condition).
5279 //
5280 // This also has the effect of swapping which one of 'less' or 'greater'
5281 // returns true, so we also swap the compare operands. It also switches
5282 // whether we return true for 'equality', so we compensate by picking the
5283 // opposite condition code to our original choice.
5284 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
5285 CC == ISD::SETUGT) {
5286 swpCmpOps = !swpCmpOps;
5287 swpVselOps = !swpVselOps;
5288 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
5289 }
5290
5291 // 'ordered' is 'anything but unordered', so use the VS condition code and
5292 // swap the VSEL operands.
5293 if (CC == ISD::SETO) {
5294 CondCode = ARMCC::VS;
5295 swpVselOps = true;
5296 }
5297
5298 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
5299 // code and swap the VSEL operands. Also do this if we don't care about the
5300 // unordered case.
5301 if (CC == ISD::SETUNE || CC == ISD::SETNE) {
5302 CondCode = ARMCC::EQ;
5303 swpVselOps = true;
5304 }
5305}
5306
5307SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
5308 SDValue TrueVal, SDValue ARMcc,
5309 SDValue Flags, SelectionDAG &DAG) const {
5310 if (!Subtarget->hasFP64() && VT == MVT::f64) {
5312 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
5314 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
5315
5316 SDValue TrueLow = TrueVal.getValue(0);
5317 SDValue TrueHigh = TrueVal.getValue(1);
5318 SDValue FalseLow = FalseVal.getValue(0);
5319 SDValue FalseHigh = FalseVal.getValue(1);
5320
5321 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
5322 ARMcc, Flags);
5323 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
5324 ARMcc, Flags);
5325
5326 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
5327 }
5328 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, Flags);
5329}
5330
5332 return CC == ISD::SETGT || CC == ISD::SETGE;
5333}
5334
5336 return CC == ISD::SETLT || CC == ISD::SETLE;
5337}
5338
5339// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
5340// All of these conditions (and their <= and >= counterparts) will do:
5341// x < k ? k : x
5342// x > k ? x : k
5343// k < x ? x : k
5344// k > x ? k : x
5345static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
5346 const SDValue TrueVal, const SDValue FalseVal,
5347 const ISD::CondCode CC, const SDValue K) {
5348 return (isGTorGE(CC) &&
5349 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
5350 (isLTorLE(CC) &&
5351 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
5352}
5353
5354// Check if two chained conditionals could be converted into SSAT or USAT.
5355//
5356// SSAT can replace a set of two conditional selectors that bound a number to an
5357// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
5358//
5359// x < -k ? -k : (x > k ? k : x)
5360// x < -k ? -k : (x < k ? x : k)
5361// x > -k ? (x > k ? k : x) : -k
5362// x < k ? (x < -k ? -k : x) : k
5363// etc.
5364//
5365// LLVM canonicalizes these to either a min(max()) or a max(min())
5366// pattern. This function tries to match one of these and will return a SSAT
5367// node if successful.
5368//
5369// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
5370// is a power of 2.
5372 EVT VT = Op.getValueType();
5373 SDValue V1 = Op.getOperand(0);
5374 SDValue K1 = Op.getOperand(1);
5375 SDValue TrueVal1 = Op.getOperand(2);
5376 SDValue FalseVal1 = Op.getOperand(3);
5377 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5378
5379 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
5380 if (Op2.getOpcode() != ISD::SELECT_CC)
5381 return SDValue();
5382
5383 SDValue V2 = Op2.getOperand(0);
5384 SDValue K2 = Op2.getOperand(1);
5385 SDValue TrueVal2 = Op2.getOperand(2);
5386 SDValue FalseVal2 = Op2.getOperand(3);
5387 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
5388
5389 SDValue V1Tmp = V1;
5390 SDValue V2Tmp = V2;
5391
5392 // Check that the registers and the constants match a max(min()) or min(max())
5393 // pattern
5394 if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
5395 K2 != FalseVal2 ||
5396 !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
5397 return SDValue();
5398
5399 // Check that the constant in the lower-bound check is
5400 // the opposite of the constant in the upper-bound check
5401 // in 1's complement.
5402 if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))
5403 return SDValue();
5404
5405 int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
5406 int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
5407 int64_t PosVal = std::max(Val1, Val2);
5408 int64_t NegVal = std::min(Val1, Val2);
5409
5410 if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
5411 !isPowerOf2_64(PosVal + 1))
5412 return SDValue();
5413
5414 // Handle the difference between USAT (unsigned) and SSAT (signed)
5415 // saturation
5416 // At this point, PosVal is guaranteed to be positive
5417 uint64_t K = PosVal;
5418 SDLoc dl(Op);
5419 if (Val1 == ~Val2)
5420 return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
5421 DAG.getConstant(llvm::countr_one(K), dl, VT));
5422 if (NegVal == 0)
5423 return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
5424 DAG.getConstant(llvm::countr_one(K), dl, VT));
5425
5426 return SDValue();
5427}
5428
5429// Check if a condition of the type x < k ? k : x can be converted into a
5430// bit operation instead of conditional moves.
5431// Currently this is allowed given:
5432// - The conditions and values match up
5433// - k is 0 or -1 (all ones)
5434// This function will not check the last condition, thats up to the caller
5435// It returns true if the transformation can be made, and in such case
5436// returns x in V, and k in SatK.
5438 SDValue &SatK)
5439{
5440 SDValue LHS = Op.getOperand(0);
5441 SDValue RHS = Op.getOperand(1);
5442 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5443 SDValue TrueVal = Op.getOperand(2);
5444 SDValue FalseVal = Op.getOperand(3);
5445
5446 SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
5447 ? &RHS
5448 : nullptr;
5449
5450 // No constant operation in comparison, early out
5451 if (!K)
5452 return false;
5453
5454 SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
5455 V = (KTmp == TrueVal) ? FalseVal : TrueVal;
5456 SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
5457
5458 // If the constant on left and right side, or variable on left and right,
5459 // does not match, early out
5460 if (*K != KTmp || V != VTmp)
5461 return false;
5462
5463 if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
5464 SatK = *K;
5465 return true;
5466 }
5467
5468 return false;
5469}
5470
5471bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
5472 if (VT == MVT::f32)
5473 return !Subtarget->hasVFP2Base();
5474 if (VT == MVT::f64)
5475 return !Subtarget->hasFP64();
5476 if (VT == MVT::f16)
5477 return !Subtarget->hasFullFP16();
5478 return false;
5479}
5480
5481SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5482 EVT VT = Op.getValueType();
5483 SDLoc dl(Op);
5484
5485 // Try to convert two saturating conditional selects into a single SSAT
5486 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5487 if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
5488 return SatValue;
5489
5490 // Try to convert expressions of the form x < k ? k : x (and similar forms)
5491 // into more efficient bit operations, which is possible when k is 0 or -1
5492 // On ARM and Thumb-2 which have flexible operand 2 this will result in
5493 // single instructions. On Thumb the shift and the bit operation will be two
5494 // instructions.
5495 // Only allow this transformation on full-width (32-bit) operations
5496 SDValue LowerSatConstant;
5497 SDValue SatValue;
5498 if (VT == MVT::i32 &&
5499 isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
5500 SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
5501 DAG.getConstant(31, dl, VT));
5502 if (isNullConstant(LowerSatConstant)) {
5503 SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
5504 DAG.getAllOnesConstant(dl, VT));
5505 return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
5506 } else if (isAllOnesConstant(LowerSatConstant))
5507 return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
5508 }
5509
5510 SDValue LHS = Op.getOperand(0);
5511 SDValue RHS = Op.getOperand(1);
5512 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5513 SDValue TrueVal = Op.getOperand(2);
5514 SDValue FalseVal = Op.getOperand(3);
5515 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
5516 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
5517
5518 if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5519 LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
5520 unsigned TVal = CTVal->getZExtValue();
5521 unsigned FVal = CFVal->getZExtValue();
5522 unsigned Opcode = 0;
5523
5524 if (TVal == ~FVal) {
5525 Opcode = ARMISD::CSINV;
5526 } else if (TVal == ~FVal + 1) {
5527 Opcode = ARMISD::CSNEG;
5528 } else if (TVal + 1 == FVal) {
5529 Opcode = ARMISD::CSINC;
5530 } else if (TVal == FVal + 1) {
5531 Opcode = ARMISD::CSINC;
5532 std::swap(TrueVal, FalseVal);
5533 std::swap(TVal, FVal);
5534 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5535 }
5536
5537 if (Opcode) {
5538 // If one of the constants is cheaper than another, materialise the
5539 // cheaper one and let the csel generate the other.
5540 if (Opcode != ARMISD::CSINC &&
5541 HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
5542 std::swap(TrueVal, FalseVal);
5543 std::swap(TVal, FVal);
5544 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5545 }
5546
5547 // Attempt to use ZR checking TVal is 0, possibly inverting the condition
5548 // to get there. CSINC not is invertable like the other two (~(~a) == a,
5549 // -(-a) == a, but (a+1)+1 != a).
5550 if (FVal == 0 && Opcode != ARMISD::CSINC) {
5551 std::swap(TrueVal, FalseVal);
5552 std::swap(TVal, FVal);
5553 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5554 }
5555
5556 // Drops F's value because we can get it by inverting/negating TVal.
5557 FalseVal = TrueVal;
5558
5559 SDValue ARMcc;
5560 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5561 EVT VT = TrueVal.getValueType();
5562 return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
5563 }
5564 }
5565
5566 if (isUnsupportedFloatingType(LHS.getValueType())) {
5568 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5569
5570 // If softenSetCCOperands only returned one value, we should compare it to
5571 // zero.
5572 if (!RHS.getNode()) {
5573 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5574 CC = ISD::SETNE;
5575 }
5576 }
5577
5578 if (LHS.getValueType() == MVT::i32) {
5579 // Try to generate VSEL on ARMv8.
5580 // The VSEL instruction can't use all the usual ARM condition
5581 // codes: it only has two bits to select the condition code, so it's
5582 // constrained to use only GE, GT, VS and EQ.
5583 //
5584 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
5585 // swap the operands of the previous compare instruction (effectively
5586 // inverting the compare condition, swapping 'less' and 'greater') and
5587 // sometimes need to swap the operands to the VSEL (which inverts the
5588 // condition in the sense of firing whenever the previous condition didn't)
5589 if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
5590 TrueVal.getValueType() == MVT::f32 ||
5591 TrueVal.getValueType() == MVT::f64)) {
5593 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
5594 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
5595 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5596 std::swap(TrueVal, FalseVal);
5597 }
5598 }
5599
5600 SDValue ARMcc;
5601 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5602 // Choose GE over PL, which vsel does now support
5603 if (ARMcc->getAsZExtVal() == ARMCC::PL)
5604 ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
5605 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
5606 }
5607
5608 ARMCC::CondCodes CondCode, CondCode2;
5609 FPCCToARMCC(CC, CondCode, CondCode2);
5610
5611 // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
5612 // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
5613 // must use VSEL (limited condition codes), due to not having conditional f16
5614 // moves.
5615 if (Subtarget->hasFPARMv8Base() &&
5616 !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
5617 (TrueVal.getValueType() == MVT::f16 ||
5618 TrueVal.getValueType() == MVT::f32 ||
5619 TrueVal.getValueType() == MVT::f64)) {
5620 bool swpCmpOps = false;
5621 bool swpVselOps = false;
5622 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
5623
5624 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
5625 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
5626 if (swpCmpOps)
5627 std::swap(LHS, RHS);
5628 if (swpVselOps)
5629 std::swap(TrueVal, FalseVal);
5630 }
5631 }
5632
5633 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5634 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5635 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
5636 if (CondCode2 != ARMCC::AL) {
5637 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
5638 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, Cmp, DAG);
5639 }
5640 return Result;
5641}
5642
5643/// canChangeToInt - Given the fp compare operand, return true if it is suitable
5644/// to morph to an integer compare sequence.
5645static bool canChangeToInt(SDValue Op, bool &SeenZero,
5646 const ARMSubtarget *Subtarget) {
5647 SDNode *N = Op.getNode();
5648 if (!N->hasOneUse())
5649 // Otherwise it requires moving the value from fp to integer registers.
5650 return false;
5651 if (!N->getNumValues())
5652 return false;
5653 EVT VT = Op.getValueType();
5654 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5655 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
5656 // vmrs are very slow, e.g. cortex-a8.
5657 return false;
5658
5659 if (isFloatingPointZero(Op)) {
5660 SeenZero = true;
5661 return true;
5662 }
5663 return ISD::isNormalLoad(N);
5664}
5665
5668 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
5669
5670 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
5671 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
5672 Ld->getPointerInfo(), Ld->getAlign(),
5673 Ld->getMemOperand()->getFlags());
5674
5675 llvm_unreachable("Unknown VFP cmp argument!");
5676}
5677
5679 SDValue &RetVal1, SDValue &RetVal2) {
5680 SDLoc dl(Op);
5681
5682 if (isFloatingPointZero(Op)) {
5683 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
5684 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
5685 return;
5686 }
5687
5688 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
5689 SDValue Ptr = Ld->getBasePtr();
5690 RetVal1 =
5691 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5692 Ld->getAlign(), Ld->getMemOperand()->getFlags());
5693
5694 EVT PtrType = Ptr.getValueType();
5695 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
5696 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
5697 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5698 Ld->getPointerInfo().getWithOffset(4),
5699 commonAlignment(Ld->getAlign(), 4),
5700 Ld->getMemOperand()->getFlags());
5701 return;
5702 }
5703
5704 llvm_unreachable("Unknown VFP cmp argument!");
5705}
5706
5707/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
5708/// f32 and even f64 comparisons to integer ones.
5709SDValue
5710ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
5711 SDValue Chain = Op.getOperand(0);
5712 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5713 SDValue LHS = Op.getOperand(2);
5714 SDValue RHS = Op.getOperand(3);
5715 SDValue Dest = Op.getOperand(4);
5716 SDLoc dl(Op);
5717
5718 bool LHSSeenZero = false;
5719 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
5720 bool RHSSeenZero = false;
5721 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
5722 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5723 // If unsafe fp math optimization is enabled and there are no other uses of
5724 // the CMP operands, and the condition code is EQ or NE, we can optimize it
5725 // to an integer comparison.
5726 if (CC == ISD::SETOEQ)
5727 CC = ISD::SETEQ;
5728 else if (CC == ISD::SETUNE)
5729 CC = ISD::SETNE;
5730
5731 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5732 SDValue ARMcc;
5733 if (LHS.getValueType() == MVT::f32) {
5734 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5735 bitcastf32Toi32(LHS, DAG), Mask);
5736 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5737 bitcastf32Toi32(RHS, DAG), Mask);
5738 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5739 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5740 Cmp);
5741 }
5742
5743 SDValue LHS1, LHS2;
5744 SDValue RHS1, RHS2;
5745 expandf64Toi32(LHS, DAG, LHS1, LHS2);
5746 expandf64Toi32(RHS, DAG, RHS1, RHS2);
5747 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
5748 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
5750 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5751 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5752 return DAG.getNode(ARMISD::BCC_i64, dl, MVT::Other, Ops);
5753 }
5754
5755 return SDValue();
5756}
5757
5758SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
5759 SDValue Chain = Op.getOperand(0);
5760 SDValue Cond = Op.getOperand(1);
5761 SDValue Dest = Op.getOperand(2);
5762 SDLoc dl(Op);
5763
5764 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5765 // instruction.
5766 unsigned Opc = Cond.getOpcode();
5767 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5768 !Subtarget->isThumb1Only();
5769 if (Cond.getResNo() == 1 &&
5770 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5771 Opc == ISD::USUBO || OptimizeMul)) {
5772 // Only lower legal XALUO ops.
5773 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5774 return SDValue();
5775
5776 // The actual operation with overflow check.
5777 SDValue Value, OverflowCmp;
5778 SDValue ARMcc;
5779 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5780
5781 // Reverse the condition code.
5783 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5785 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5786
5787 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5788 OverflowCmp);
5789 }
5790
5791 return SDValue();
5792}
5793
5794SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
5795 SDValue Chain = Op.getOperand(0);
5796 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5797 SDValue LHS = Op.getOperand(2);
5798 SDValue RHS = Op.getOperand(3);
5799 SDValue Dest = Op.getOperand(4);
5800 SDLoc dl(Op);
5801
5802 if (isUnsupportedFloatingType(LHS.getValueType())) {
5804 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5805
5806 // If softenSetCCOperands only returned one value, we should compare it to
5807 // zero.
5808 if (!RHS.getNode()) {
5809 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5810 CC = ISD::SETNE;
5811 }
5812 }
5813
5814 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5815 // instruction.
5816 unsigned Opc = LHS.getOpcode();
5817 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5818 !Subtarget->isThumb1Only();
5819 if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
5820 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5821 Opc == ISD::USUBO || OptimizeMul) &&
5822 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
5823 // Only lower legal XALUO ops.
5824 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
5825 return SDValue();
5826
5827 // The actual operation with overflow check.
5828 SDValue Value, OverflowCmp;
5829 SDValue ARMcc;
5830 std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
5831
5832 if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
5833 // Reverse the condition code.
5835 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5837 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5838 }
5839
5840 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5841 OverflowCmp);
5842 }
5843
5844 if (LHS.getValueType() == MVT::i32) {
5845 SDValue ARMcc;
5846 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5847 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, Cmp);
5848 }
5849
5850 if (getTargetMachine().Options.UnsafeFPMath &&
5851 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
5852 CC == ISD::SETNE || CC == ISD::SETUNE)) {
5853 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
5854 return Result;
5855 }
5856
5857 ARMCC::CondCodes CondCode, CondCode2;
5858 FPCCToARMCC(CC, CondCode, CondCode2);
5859
5860 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5861 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5862 SDValue Ops[] = {Chain, Dest, ARMcc, Cmp};
5863 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops);
5864 if (CondCode2 != ARMCC::AL) {
5865 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
5866 SDValue Ops[] = {Res, Dest, ARMcc, Cmp};
5867 Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops);
5868 }
5869 return Res;
5870}
5871
5872SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
5873 SDValue Chain = Op.getOperand(0);
5874 SDValue Table = Op.getOperand(1);
5875 SDValue Index = Op.getOperand(2);
5876 SDLoc dl(Op);
5877
5878 EVT PTy = getPointerTy(DAG.getDataLayout());
5879 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
5880 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
5881 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
5882 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
5883 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
5884 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5885 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
5886 // which does another jump to the destination. This also makes it easier
5887 // to translate it to TBB / TBH later (Thumb2 only).
5888 // FIXME: This might not work if the function is extremely large.
5889 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
5890 Addr, Op.getOperand(2), JTI);
5891 }
5892 if (isPositionIndependent() || Subtarget->isROPI()) {
5893 Addr =
5894 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
5896 Chain = Addr.getValue(1);
5897 Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
5898 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5899 } else {
5900 Addr =
5901 DAG.getLoad(PTy, dl, Chain, Addr,
5903 Chain = Addr.getValue(1);
5904 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5905 }
5906}
5907
5909 EVT VT = Op.getValueType();
5910 SDLoc dl(Op);
5911
5912 if (Op.getValueType().getVectorElementType() == MVT::i32) {
5913 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
5914 return Op;
5915 return DAG.UnrollVectorOp(Op.getNode());
5916 }
5917
5918 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5919
5920 EVT NewTy;
5921 const EVT OpTy = Op.getOperand(0).getValueType();
5922 if (OpTy == MVT::v4f32)
5923 NewTy = MVT::v4i32;
5924 else if (OpTy == MVT::v4f16 && HasFullFP16)
5925 NewTy = MVT::v4i16;
5926 else if (OpTy == MVT::v8f16 && HasFullFP16)
5927 NewTy = MVT::v8i16;
5928 else
5929 llvm_unreachable("Invalid type for custom lowering!");
5930
5931 if (VT != MVT::v4i16 && VT != MVT::v8i16)
5932 return DAG.UnrollVectorOp(Op.getNode());
5933
5934 Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
5935 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
5936}
5937
5938SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
5939 EVT VT = Op.getValueType();
5940 if (VT.isVector())
5941 return LowerVectorFP_TO_INT(Op, DAG);
5942
5943 bool IsStrict = Op->isStrictFPOpcode();
5944 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
5945
5946 if (isUnsupportedFloatingType(SrcVal.getValueType())) {
5947 RTLIB::Libcall LC;
5948 if (Op.getOpcode() == ISD::FP_TO_SINT ||
5949 Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
5950 LC = RTLIB::getFPTOSINT(SrcVal.getValueType(),
5951 Op.getValueType());
5952 else
5953 LC = RTLIB::getFPTOUINT(SrcVal.getValueType(),
5954 Op.getValueType());
5955 SDLoc Loc(Op);
5956 MakeLibCallOptions CallOptions;
5957 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
5959 std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
5960 CallOptions, Loc, Chain);
5961 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
5962 }
5963
5964 // FIXME: Remove this when we have strict fp instruction selection patterns
5965 if (IsStrict) {
5966 SDLoc Loc(Op);
5967 SDValue Result =
5970 Loc, Op.getValueType(), SrcVal);
5971 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
5972 }
5973
5974 return Op;
5975}
5976
5978 const ARMSubtarget *Subtarget) {
5979 EVT VT = Op.getValueType();
5980 EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
5981 EVT FromVT = Op.getOperand(0).getValueType();
5982
5983 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32)
5984 return Op;
5985 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 &&
5986 Subtarget->hasFP64())
5987 return Op;
5988 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 &&
5989 Subtarget->hasFullFP16())
5990 return Op;
5991 if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 &&
5992 Subtarget->hasMVEFloatOps())
5993 return Op;
5994 if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 &&
5995 Subtarget->hasMVEFloatOps())
5996 return Op;
5997
5998 if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16)
5999 return SDValue();
6000
6001 SDLoc DL(Op);
6002 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
6003 unsigned BW = ToVT.getScalarSizeInBits() - IsSigned;
6004 SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
6005 DAG.getValueType(VT.getScalarType()));
6006 SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT,
6007 DAG.getConstant((1 << BW) - 1, DL, VT));
6008 if (IsSigned)
6009 Max = DAG.getNode(ISD::SMAX, DL, VT, Max,
6010 DAG.getSignedConstant(-(1 << BW), DL, VT));
6011 return Max;
6012}
6013
6015 EVT VT = Op.getValueType();
6016 SDLoc dl(Op);
6017
6018 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
6019 if (VT.getVectorElementType() == MVT::f32)
6020 return Op;
6021 return DAG.UnrollVectorOp(Op.getNode());
6022 }
6023
6024 assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||
6025 Op.getOperand(0).getValueType() == MVT::v8i16) &&
6026 "Invalid type for custom lowering!");
6027
6028 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
6029
6030 EVT DestVecType;
6031 if (VT == MVT::v4f32)
6032 DestVecType = MVT::v4i32;
6033 else if (VT == MVT::v4f16 && HasFullFP16)
6034 DestVecType = MVT::v4i16;
6035 else if (VT == MVT::v8f16 && HasFullFP16)
6036 DestVecType = MVT::v8i16;
6037 else
6038 return DAG.UnrollVectorOp(Op.getNode());
6039
6040 unsigned CastOpc;
6041 unsigned Opc;
6042 switch (Op.getOpcode()) {
6043 default: llvm_unreachable("Invalid opcode!");
6044 case ISD::SINT_TO_FP:
6045 CastOpc = ISD::SIGN_EXTEND;
6046 Opc = ISD::SINT_TO_FP;
6047 break;
6048 case ISD::UINT_TO_FP:
6049 CastOpc = ISD::ZERO_EXTEND;
6050 Opc = ISD::UINT_TO_FP;
6051 break;
6052 }
6053
6054 Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
6055 return DAG.getNode(Opc, dl, VT, Op);
6056}
6057
6058SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
6059 EVT VT = Op.getValueType();
6060 if (VT.isVector())
6061 return LowerVectorINT_TO_FP(Op, DAG);
6062 if (isUnsupportedFloatingType(VT)) {
6063 RTLIB::Libcall LC;
6064 if (Op.getOpcode() == ISD::SINT_TO_FP)
6065 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
6066 Op.getValueType());
6067 else
6068 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
6069 Op.getValueType());
6070 MakeLibCallOptions CallOptions;
6071 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
6072 CallOptions, SDLoc(Op)).first;
6073 }
6074
6075 return Op;
6076}
6077
6078SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
6079 // Implement fcopysign with a fabs and a conditional fneg.
6080 SDValue Tmp0 = Op.getOperand(0);
6081 SDValue Tmp1 = Op.getOperand(1);
6082 SDLoc dl(Op);
6083 EVT VT = Op.getValueType();
6084 EVT SrcVT = Tmp1.getValueType();
6085 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
6086 Tmp0.getOpcode() == ARMISD::VMOVDRR;
6087 bool UseNEON = !InGPR && Subtarget->hasNEON();
6088
6089 if (UseNEON) {
6090 // Use VBSL to copy the sign bit.
6091 unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80);
6092 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
6093 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
6094 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
6095 if (VT == MVT::f64)
6096 Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6097 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
6098 DAG.getConstant(32, dl, MVT::i32));
6099 else /*if (VT == MVT::f32)*/
6100 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
6101 if (SrcVT == MVT::f32) {
6102 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
6103 if (VT == MVT::f64)
6104 Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6105 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
6106 DAG.getConstant(32, dl, MVT::i32));
6107 } else if (VT == MVT::f32)
6108 Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
6109 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
6110 DAG.getConstant(32, dl, MVT::i32));
6111 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
6112 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
6113
6115 dl, MVT::i32);
6116 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
6117 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
6118 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
6119
6120 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
6121 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
6122 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
6123 if (VT == MVT::f32) {
6124 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
6125 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
6126 DAG.getConstant(0, dl, MVT::i32));
6127 } else {
6128 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
6129 }
6130
6131 return Res;
6132 }
6133
6134 // Bitcast operand 1 to i32.
6135 if (SrcVT == MVT::f64)
6136 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6137 Tmp1).getValue(1);
6138 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
6139
6140 // Or in the signbit with integer operations.
6141 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
6142 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
6143 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
6144 if (VT == MVT::f32) {
6145 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
6146 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
6147 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
6148 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
6149 }
6150
6151 // f64: Or the high part with signbit and then combine two parts.
6152 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6153 Tmp0);
6154 SDValue Lo = Tmp0.getValue(0);
6155 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
6156 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
6157 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
6158}
6159
6160SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
6162 MachineFrameInfo &MFI = MF.getFrameInfo();
6163 MFI.setReturnAddressIsTaken(true);
6164
6166 return SDValue();
6167
6168 EVT VT = Op.getValueType();
6169 SDLoc dl(Op);
6170 unsigned Depth = Op.getConstantOperandVal(0);
6171 if (Depth) {
6172 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
6173 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
6174 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
6175 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
6177 }
6178
6179 // Return LR, which contains the return address. Mark it an implicit live-in.
6180 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
6181 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
6182}
6183
6184SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
6185 const ARMBaseRegisterInfo &ARI =
6186 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
6188 MachineFrameInfo &MFI = MF.getFrameInfo();
6189 MFI.setFrameAddressIsTaken(true);
6190
6191 EVT VT = Op.getValueType();
6192 SDLoc dl(Op); // FIXME probably not meaningful
6193 unsigned Depth = Op.getConstantOperandVal(0);
6194 Register FrameReg = ARI.getFrameRegister(MF);
6195 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
6196 while (Depth--)
6197 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
6199 return FrameAddr;
6200}
6201
6202// FIXME? Maybe this could be a TableGen attribute on some registers and
6203// this table could be generated automatically from RegInfo.
6204Register ARMTargetLowering::getRegisterByName(const char* RegName, LLT VT,
6205 const MachineFunction &MF) const {
6207 .Case("sp", ARM::SP)
6208 .Default(0);
6209 if (Reg)
6210 return Reg;
6211 report_fatal_error(Twine("Invalid register name \""
6212 + StringRef(RegName) + "\"."));
6213}
6214
6215// Result is 64 bit value so split into two 32 bit values and return as a
6216// pair of values.
6218 SelectionDAG &DAG) {
6219 SDLoc DL(N);
6220
6221 // This function is only supposed to be called for i64 type destination.
6222 assert(N->getValueType(0) == MVT::i64
6223 && "ExpandREAD_REGISTER called for non-i64 type result.");
6224
6226 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
6227 N->getOperand(0),
6228 N->getOperand(1));
6229
6230 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
6231 Read.getValue(1)));
6232 Results.push_back(Read.getOperand(0));
6233}
6234
6235/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
6236/// When \p DstVT, the destination type of \p BC, is on the vector
6237/// register bank and the source of bitcast, \p Op, operates on the same bank,
6238/// it might be possible to combine them, such that everything stays on the
6239/// vector register bank.
6240/// \p return The node that would replace \p BT, if the combine
6241/// is possible.
6243 SelectionDAG &DAG) {
6244 SDValue Op = BC->getOperand(0);
6245 EVT DstVT = BC->getValueType(0);
6246
6247 // The only vector instruction that can produce a scalar (remember,
6248 // since the bitcast was about to be turned into VMOVDRR, the source
6249 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
6250 // Moreover, we can do this combine only if there is one use.
6251 // Finally, if the destination type is not a vector, there is not
6252 // much point on forcing everything on the vector bank.
6253 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6254 !Op.hasOneUse())
6255 return SDValue();
6256
6257 // If the index is not constant, we will introduce an additional
6258 // multiply that will stick.
6259 // Give up in that case.
6260 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
6261 if (!Index)
6262 return SDValue();
6263 unsigned DstNumElt = DstVT.getVectorNumElements();
6264
6265 // Compute the new index.
6266 const APInt &APIntIndex = Index->getAPIntValue();
6267 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
6268 NewIndex *= APIntIndex;
6269 // Check if the new constant index fits into i32.
6270 if (NewIndex.getBitWidth() > 32)
6271 return SDValue();
6272
6273 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
6274 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
6275 SDLoc dl(Op);
6276 SDValue ExtractSrc = Op.getOperand(0);
6277 EVT VecVT = EVT::getVectorVT(
6278 *DAG.getContext(), DstVT.getScalarType(),
6279 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
6280 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
6281 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
6282 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
6283}
6284
6285/// ExpandBITCAST - If the target supports VFP, this function is called to
6286/// expand a bit convert where either the source or destination type is i64 to
6287/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
6288/// operand type is illegal (e.g., v2f32 for a target that doesn't support
6289/// vectors), since the legalizer won't know what to do with that.
6290SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
6291 const ARMSubtarget *Subtarget) const {
6292 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6293 SDLoc dl(N);
6294 SDValue Op = N->getOperand(0);
6295
6296 // This function is only supposed to be called for i16 and i64 types, either
6297 // as the source or destination of the bit convert.
6298 EVT SrcVT = Op.getValueType();
6299 EVT DstVT = N->getValueType(0);
6300
6301 if ((SrcVT == MVT::i16 || SrcVT == MVT::i32) &&
6302 (DstVT == MVT::f16 || DstVT == MVT::bf16))
6303 return MoveToHPR(SDLoc(N), DAG, MVT::i32, DstVT.getSimpleVT(),
6304 DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op));
6305
6306 if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
6307 (SrcVT == MVT::f16 || SrcVT == MVT::bf16)) {
6308 if (Subtarget->hasFullFP16() && !Subtarget->hasBF16())
6309 Op = DAG.getBitcast(MVT::f16, Op);
6310 return DAG.getNode(
6311 ISD::TRUNCATE, SDLoc(N), DstVT,
6312 MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op));
6313 }
6314
6315 if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
6316 return SDValue();
6317
6318 // Turn i64->f64 into VMOVDRR.
6319 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
6320 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
6321 // if we can combine the bitcast with its source.
6323 return Val;
6324 SDValue Lo, Hi;
6325 std::tie(Lo, Hi) = DAG.SplitScalar(Op, dl, MVT::i32, MVT::i32);
6326 return DAG.getNode(ISD::BITCAST, dl, DstVT,
6327 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
6328 }
6329
6330 // Turn f64->i64 into VMOVRRD.
6331 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
6332 SDValue Cvt;
6333 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
6334 SrcVT.getVectorNumElements() > 1)
6335 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6336 DAG.getVTList(MVT::i32, MVT::i32),
6337 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
6338 else
6339 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6340 DAG.getVTList(MVT::i32, MVT::i32), Op);
6341 // Merge the pieces into a single i64 value.
6342 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
6343 }
6344
6345 return SDValue();
6346}
6347
6348/// getZeroVector - Returns a vector of specified type with all zero elements.
6349/// Zero vectors are used to represent vector negation and in those cases
6350/// will be implemented with the NEON VNEG instruction. However, VNEG does
6351/// not support i64 elements, so sometimes the zero vectors will need to be
6352/// explicitly constructed. Regardless, use a canonical VMOV to create the
6353/// zero vector.
6354static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
6355 assert(VT.isVector() && "Expected a vector type");
6356 // The canonical modified immediate encoding of a zero vector is....0!
6357 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
6358 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
6359 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
6360 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6361}
6362
6363/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
6364/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6365SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
6366 SelectionDAG &DAG) const {
6367 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6368 EVT VT = Op.getValueType();
6369 unsigned VTBits = VT.getSizeInBits();
6370 SDLoc dl(Op);
6371 SDValue ShOpLo = Op.getOperand(0);
6372 SDValue ShOpHi = Op.getOperand(1);
6373 SDValue ShAmt = Op.getOperand(2);
6374 SDValue ARMcc;
6375 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
6376
6377 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
6378
6379 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6380 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6381 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
6382 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6383 DAG.getConstant(VTBits, dl, MVT::i32));
6384 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
6385 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6386 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
6387 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6388 ISD::SETGE, ARMcc, DAG, dl);
6389 SDValue Lo =
6390 DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, ARMcc, CmpLo);
6391
6392 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
6393 SDValue HiBigShift = Opc == ISD::SRA
6394 ? DAG.getNode(Opc, dl, VT, ShOpHi,
6395 DAG.getConstant(VTBits - 1, dl, VT))
6396 : DAG.getConstant(0, dl, VT);
6397 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6398 ISD::SETGE, ARMcc, DAG, dl);
6399 SDValue Hi =
6400 DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
6401
6402 SDValue Ops[2] = { Lo, Hi };
6403 return DAG.getMergeValues(Ops, dl);
6404}
6405
6406/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
6407/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6408SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
6409 SelectionDAG &DAG) const {
6410 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6411 EVT VT = Op.getValueType();
6412 unsigned VTBits = VT.getSizeInBits();
6413 SDLoc dl(Op);
6414 SDValue ShOpLo = Op.getOperand(0);
6415 SDValue ShOpHi = Op.getOperand(1);
6416 SDValue ShAmt = Op.getOperand(2);
6417 SDValue ARMcc;
6418
6419 assert(Op.getOpcode() == ISD::SHL_PARTS);
6420 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6421 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6422 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
6423 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
6424 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6425
6426 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6427 DAG.getConstant(VTBits, dl, MVT::i32));
6428 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
6429 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6430 ISD::SETGE, ARMcc, DAG, dl);
6431 SDValue Hi =
6432 DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
6433
6434 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6435 ISD::SETGE, ARMcc, DAG, dl);
6436 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
6437 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
6438 DAG.getConstant(0, dl, VT), ARMcc, CmpLo);
6439
6440 SDValue Ops[2] = { Lo, Hi };
6441 return DAG.getMergeValues(Ops, dl);
6442}
6443
6444SDValue ARMTargetLowering::LowerGET_ROUNDING(SDValue Op,
6445 SelectionDAG &DAG) const {
6446 // The rounding mode is in bits 23:22 of the FPSCR.
6447 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
6448 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
6449 // so that the shift + and get folded into a bitfield extract.
6450 SDLoc dl(Op);
6451 SDValue Chain = Op.getOperand(0);
6452 SDValue Ops[] = {Chain,
6453 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
6454
6455 SDValue FPSCR =
6456 DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, {MVT::i32, MVT::Other}, Ops);
6457 Chain = FPSCR.getValue(1);
6458 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
6459 DAG.getConstant(1U << 22, dl, MVT::i32));
6460 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
6461 DAG.getConstant(22, dl, MVT::i32));
6462 SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
6463 DAG.getConstant(3, dl, MVT::i32));
6464 return DAG.getMergeValues({And, Chain}, dl);
6465}
6466
6467SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op,
6468 SelectionDAG &DAG) const {
6469 SDLoc DL(Op);
6470 SDValue Chain = Op->getOperand(0);
6471 SDValue RMValue = Op->getOperand(1);
6472
6473 // The rounding mode is in bits 23:22 of the FPSCR.
6474 // The llvm.set.rounding argument value to ARM rounding mode value mapping
6475 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
6476 // ((arg - 1) & 3) << 22).
6477 //
6478 // It is expected that the argument of llvm.set.rounding is within the
6479 // segment [0, 3], so NearestTiesToAway (4) is not handled here. It is
6480 // responsibility of the code generated llvm.set.rounding to ensure this
6481 // condition.
6482
6483 // Calculate new value of FPSCR[23:22].
6484 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
6485 DAG.getConstant(1, DL, MVT::i32));
6486 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
6487 DAG.getConstant(0x3, DL, MVT::i32));
6488 RMValue = DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
6489 DAG.getConstant(ARM::RoundingBitsPos, DL, MVT::i32));
6490
6491 // Get current value of FPSCR.
6492 SDValue Ops[] = {Chain,
6493 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6494 SDValue FPSCR =
6495 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6496 Chain = FPSCR.getValue(1);
6497 FPSCR = FPSCR.getValue(0);
6498
6499 // Put new rounding mode into FPSCR[23:22].
6500 const unsigned RMMask = ~(ARM::Rounding::rmMask << ARM::RoundingBitsPos);
6501 FPSCR = DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6502 DAG.getConstant(RMMask, DL, MVT::i32));
6503 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCR, RMValue);
6504 SDValue Ops2[] = {
6505 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6506 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6507}
6508
6509SDValue ARMTargetLowering::LowerSET_FPMODE(SDValue Op,
6510 SelectionDAG &DAG) const {
6511 SDLoc DL(Op);
6512 SDValue Chain = Op->getOperand(0);
6513 SDValue Mode = Op->getOperand(1);
6514
6515 // Generate nodes to build:
6516 // FPSCR = (FPSCR & FPStatusBits) | (Mode & ~FPStatusBits)
6517 SDValue Ops[] = {Chain,
6518 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6519 SDValue FPSCR =
6520 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6521 Chain = FPSCR.getValue(1);
6522 FPSCR = FPSCR.getValue(0);
6523
6524 SDValue FPSCRMasked =
6525 DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6526 DAG.getConstant(ARM::FPStatusBits, DL, MVT::i32));
6527 SDValue InputMasked =
6528 DAG.getNode(ISD::AND, DL, MVT::i32, Mode,
6529 DAG.getConstant(~ARM::FPStatusBits, DL, MVT::i32));
6530 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCRMasked, InputMasked);
6531
6532 SDValue Ops2[] = {
6533 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6534 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6535}
6536
6537SDValue ARMTargetLowering::LowerRESET_FPMODE(SDValue Op,
6538 SelectionDAG &DAG) const {
6539 SDLoc DL(Op);
6540 SDValue Chain = Op->getOperand(0);
6541
6542 // To get the default FP mode all control bits are cleared:
6543 // FPSCR = FPSCR & (FPStatusBits | FPReservedBits)
6544 SDValue Ops[] = {Chain,
6545 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6546 SDValue FPSCR =
6547 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6548 Chain = FPSCR.getValue(1);
6549 FPSCR = FPSCR.getValue(0);
6550
6551 SDValue FPSCRMasked = DAG.getNode(
6552 ISD::AND, DL, MVT::i32, FPSCR,
6554 SDValue Ops2[] = {Chain,
6555 DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32),
6556 FPSCRMasked};
6557 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6558}
6559
6561 const ARMSubtarget *ST) {
6562 SDLoc dl(N);
6563 EVT VT = N->getValueType(0);
6564 if (VT.isVector() && ST->hasNEON()) {
6565
6566 // Compute the least significant set bit: LSB = X & -X
6567 SDValue X = N->getOperand(0);
6568 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
6569 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
6570
6571 EVT ElemTy = VT.getVectorElementType();
6572
6573 if (ElemTy == MVT::i8) {
6574 // Compute with: cttz(x) = ctpop(lsb - 1)
6575 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6576 DAG.getTargetConstant(1, dl, ElemTy));
6577 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6578 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6579 }
6580
6581 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
6582 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
6583 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
6584 unsigned NumBits = ElemTy.getSizeInBits();
6585 SDValue WidthMinus1 =
6586 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6587 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
6588 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
6589 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
6590 }
6591
6592 // Compute with: cttz(x) = ctpop(lsb - 1)
6593
6594 // Compute LSB - 1.
6595 SDValue Bits;
6596 if (ElemTy == MVT::i64) {
6597 // Load constant 0xffff'ffff'ffff'ffff to register.
6598 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6599 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
6600 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
6601 } else {
6602 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6603 DAG.getTargetConstant(1, dl, ElemTy));
6604 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6605 }
6606 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6607 }
6608
6609 if (!ST->hasV6T2Ops())
6610 return SDValue();
6611
6612 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
6613 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
6614}
6615
6617 const ARMSubtarget *ST) {
6618 EVT VT = N->getValueType(0);
6619 SDLoc DL(N);
6620
6621 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
6622 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
6623 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
6624 "Unexpected type for custom ctpop lowering");
6625
6626 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6627 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
6628 SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
6629 Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
6630
6631 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
6632 unsigned EltSize = 8;
6633 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
6634 while (EltSize != VT.getScalarSizeInBits()) {
6636 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
6637 TLI.getPointerTy(DAG.getDataLayout())));
6638 Ops.push_back(Res);
6639
6640 EltSize *= 2;
6641 NumElts /= 2;
6642 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
6643 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
6644 }
6645
6646 return Res;
6647}
6648
6649/// Getvshiftimm - Check if this is a valid build_vector for the immediate
6650/// operand of a vector shift operation, where all the elements of the
6651/// build_vector must have the same constant integer value.
6652static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
6653 // Ignore bit_converts.
6654 while (Op.getOpcode() == ISD::BITCAST)
6655 Op = Op.getOperand(0);
6656 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
6657 APInt SplatBits, SplatUndef;
6658 unsigned SplatBitSize;
6659 bool HasAnyUndefs;
6660 if (!BVN ||
6661 !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6662 ElementBits) ||
6663 SplatBitSize > ElementBits)
6664 return false;
6665 Cnt = SplatBits.getSExtValue();
6666 return true;
6667}
6668
6669/// isVShiftLImm - Check if this is a valid build_vector for the immediate
6670/// operand of a vector shift left operation. That value must be in the range:
6671/// 0 <= Value < ElementBits for a left shift; or
6672/// 0 <= Value <= ElementBits for a long left shift.
6673static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
6674 assert(VT.isVector() && "vector shift count is not a vector type");
6675 int64_t ElementBits = VT.getScalarSizeInBits();
6676 if (!getVShiftImm(Op, ElementBits, Cnt))
6677 return false;
6678 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6679}
6680
6681/// isVShiftRImm - Check if this is a valid build_vector for the immediate
6682/// operand of a vector shift right operation. For a shift opcode, the value
6683/// is positive, but for an intrinsic the value count must be negative. The
6684/// absolute value must be in the range:
6685/// 1 <= |Value| <= ElementBits for a right shift; or
6686/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
6687static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
6688 int64_t &Cnt) {
6689 assert(VT.isVector() && "vector shift count is not a vector type");
6690 int64_t ElementBits = VT.getScalarSizeInBits();
6691 if (!getVShiftImm(Op, ElementBits, Cnt))
6692 return false;
6693 if (!isIntrinsic)
6694 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6695 if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6696 Cnt = -Cnt;
6697 return true;
6698 }
6699 return false;
6700}
6701
6703 const ARMSubtarget *ST) {
6704 EVT VT = N->getValueType(0);
6705 SDLoc dl(N);
6706 int64_t Cnt;
6707
6708 if (!VT.isVector())
6709 return SDValue();
6710
6711 // We essentially have two forms here. Shift by an immediate and shift by a
6712 // vector register (there are also shift by a gpr, but that is just handled
6713 // with a tablegen pattern). We cannot easily match shift by an immediate in
6714 // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
6715 // For shifting by a vector, we don't have VSHR, only VSHL (which can be
6716 // signed or unsigned, and a negative shift indicates a shift right).
6717 if (N->getOpcode() == ISD::SHL) {
6718 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
6719 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
6720 DAG.getConstant(Cnt, dl, MVT::i32));
6721 return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
6722 N->getOperand(1));
6723 }
6724
6725 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
6726 "unexpected vector shift opcode");
6727
6728 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
6729 unsigned VShiftOpc =
6730 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
6731 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
6732 DAG.getConstant(Cnt, dl, MVT::i32));
6733 }
6734
6735 // Other right shifts we don't have operations for (we use a shift left by a
6736 // negative number).
6737 EVT ShiftVT = N->getOperand(1).getValueType();
6738 SDValue NegatedCount = DAG.getNode(
6739 ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
6740 unsigned VShiftOpc =
6741 (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
6742 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
6743}
6744
6746 const ARMSubtarget *ST) {
6747 EVT VT = N->getValueType(0);
6748 SDLoc dl(N);
6749
6750 // We can get here for a node like i32 = ISD::SHL i32, i64
6751 if (VT != MVT::i64)
6752 return SDValue();
6753
6754 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||
6755 N->getOpcode() == ISD::SHL) &&
6756 "Unknown shift to lower!");
6757
6758 unsigned ShOpc = N->getOpcode();
6759 if (ST->hasMVEIntegerOps()) {
6760 SDValue ShAmt = N->getOperand(1);
6761 unsigned ShPartsOpc = ARMISD::LSLL;
6762 ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);
6763
6764 // If the shift amount is greater than 32 or has a greater bitwidth than 64
6765 // then do the default optimisation
6766 if ((!Con && ShAmt->getValueType(0).getSizeInBits() > 64) ||
6767 (Con && (Con->getAPIntValue() == 0 || Con->getAPIntValue().uge(32))))
6768 return SDValue();
6769
6770 // Extract the lower 32 bits of the shift amount if it's not an i32
6771 if (ShAmt->getValueType(0) != MVT::i32)
6772 ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32);
6773
6774 if (ShOpc == ISD::SRL) {
6775 if (!Con)
6776 // There is no t2LSRLr instruction so negate and perform an lsll if the
6777 // shift amount is in a register, emulating a right shift.
6778 ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6779 DAG.getConstant(0, dl, MVT::i32), ShAmt);
6780 else
6781 // Else generate an lsrl on the immediate shift amount
6782 ShPartsOpc = ARMISD::LSRL;
6783 } else if (ShOpc == ISD::SRA)
6784 ShPartsOpc = ARMISD::ASRL;
6785
6786 // Split Lower/Upper 32 bits of the destination/source
6787 SDValue Lo, Hi;
6788 std::tie(Lo, Hi) =
6789 DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6790 // Generate the shift operation as computed above
6791 Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
6792 ShAmt);
6793 // The upper 32 bits come from the second return value of lsll
6794 Hi = SDValue(Lo.getNode(), 1);
6795 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6796 }
6797
6798 // We only lower SRA, SRL of 1 here, all others use generic lowering.
6799 if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
6800 return SDValue();
6801
6802 // If we are in thumb mode, we don't have RRX.
6803 if (ST->isThumb1Only())
6804 return SDValue();
6805
6806 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
6807 SDValue Lo, Hi;
6808 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6809
6810 // First, build a LSRS1/ASRS1 op, which shifts the top part by one and
6811 // captures the shifted out bit into a carry flag.
6812 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::LSRS1 : ARMISD::ASRS1;
6813 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, FlagsVT), Hi);
6814
6815 // The low part is an ARMISD::RRX operand, which shifts the carry in.
6816 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
6817
6818 // Merge the pieces into a single i64 value.
6819 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6820}
6821
6823 const ARMSubtarget *ST) {
6824 bool Invert = false;
6825 bool Swap = false;
6826 unsigned Opc = ARMCC::AL;
6827
6828 SDValue Op0 = Op.getOperand(0);
6829 SDValue Op1 = Op.getOperand(1);
6830 SDValue CC = Op.getOperand(2);
6831 EVT VT = Op.getValueType();
6832 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
6833 SDLoc dl(Op);
6834
6835 EVT CmpVT;
6836 if (ST->hasNEON())
6838 else {
6839 assert(ST->hasMVEIntegerOps() &&
6840 "No hardware support for integer vector comparison!");
6841
6842 if (Op.getValueType().getVectorElementType() != MVT::i1)
6843 return SDValue();
6844
6845 // Make sure we expand floating point setcc to scalar if we do not have
6846 // mve.fp, so that we can handle them from there.
6847 if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
6848 return SDValue();
6849
6850 CmpVT = VT;
6851 }
6852
6853 if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
6854 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
6855 // Special-case integer 64-bit equality comparisons. They aren't legal,
6856 // but they can be lowered with a few vector instructions.
6857 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
6858 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
6859 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
6860 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
6861 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
6862 DAG.getCondCode(ISD::SETEQ));
6863 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
6864 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
6865 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
6866 if (SetCCOpcode == ISD::SETNE)
6867 Merged = DAG.getNOT(dl, Merged, CmpVT);
6868 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
6869 return Merged;
6870 }
6871
6872 if (CmpVT.getVectorElementType() == MVT::i64)
6873 // 64-bit comparisons are not legal in general.
6874 return SDValue();
6875
6876 if (Op1.getValueType().isFloatingPoint()) {
6877 switch (SetCCOpcode) {
6878 default: llvm_unreachable("Illegal FP comparison");
6879 case ISD::SETUNE:
6880 case ISD::SETNE:
6881 if (ST->hasMVEFloatOps()) {
6882 Opc = ARMCC::NE; break;
6883 } else {
6884 Invert = true; [[fallthrough]];
6885 }
6886 case ISD::SETOEQ:
6887 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6888 case ISD::SETOLT:
6889 case ISD::SETLT: Swap = true; [[fallthrough]];
6890 case ISD::SETOGT:
6891 case ISD::SETGT: Opc = ARMCC::GT; break;
6892 case ISD::SETOLE:
6893 case ISD::SETLE: Swap = true; [[fallthrough]];
6894 case ISD::SETOGE:
6895 case ISD::SETGE: Opc = ARMCC::GE; break;
6896 case ISD::SETUGE: Swap = true; [[fallthrough]];
6897 case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break;
6898 case ISD::SETUGT: Swap = true; [[fallthrough]];
6899 case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break;
6900 case ISD::SETUEQ: Invert = true; [[fallthrough]];
6901 case ISD::SETONE: {
6902 // Expand this to (OLT | OGT).
6903 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6904 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6905 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6906 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6907 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6908 if (Invert)
6909 Result = DAG.getNOT(dl, Result, VT);
6910 return Result;
6911 }
6912 case ISD::SETUO: Invert = true; [[fallthrough]];
6913 case ISD::SETO: {
6914 // Expand this to (OLT | OGE).
6915 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6916 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6917 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6918 DAG.getConstant(ARMCC::GE, dl, MVT::i32));
6919 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6920 if (Invert)
6921 Result = DAG.getNOT(dl, Result, VT);
6922 return Result;
6923 }
6924 }
6925 } else {
6926 // Integer comparisons.
6927 switch (SetCCOpcode) {
6928 default: llvm_unreachable("Illegal integer comparison");
6929 case ISD::SETNE:
6930 if (ST->hasMVEIntegerOps()) {
6931 Opc = ARMCC::NE; break;
6932 } else {
6933 Invert = true; [[fallthrough]];
6934 }
6935 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6936 case ISD::SETLT: Swap = true; [[fallthrough]];
6937 case ISD::SETGT: Opc = ARMCC::GT; break;
6938 case ISD::SETLE: Swap = true; [[fallthrough]];
6939 case ISD::SETGE: Opc = ARMCC::GE; break;
6940 case ISD::SETULT: Swap = true; [[fallthrough]];
6941 case ISD::SETUGT: Opc = ARMCC::HI; break;
6942 case ISD::SETULE: Swap = true; [[fallthrough]];
6943 case ISD::SETUGE: Opc = ARMCC::HS; break;
6944 }
6945
6946 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
6947 if (ST->hasNEON() && Opc == ARMCC::EQ) {
6948 SDValue AndOp;
6950 AndOp = Op0;
6951 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
6952 AndOp = Op1;
6953
6954 // Ignore bitconvert.
6955 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
6956 AndOp = AndOp.getOperand(0);
6957
6958 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
6959 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
6960 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
6961 SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1);
6962 if (!Invert)
6963 Result = DAG.getNOT(dl, Result, VT);
6964 return Result;
6965 }
6966 }
6967 }
6968
6969 if (Swap)
6970 std::swap(Op0, Op1);
6971
6972 // If one of the operands is a constant vector zero, attempt to fold the
6973 // comparison to a specialized compare-against-zero form.
6975 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::EQ ||
6976 Opc == ARMCC::NE)) {
6977 if (Opc == ARMCC::GE)
6978 Opc = ARMCC::LE;
6979 else if (Opc == ARMCC::GT)
6980 Opc = ARMCC::LT;
6981 std::swap(Op0, Op1);
6982 }
6983
6984 SDValue Result;
6986 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::LE ||
6987 Opc == ARMCC::LT || Opc == ARMCC::NE || Opc == ARMCC::EQ))
6988 Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, Op0,
6989 DAG.getConstant(Opc, dl, MVT::i32));
6990 else
6991 Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6992 DAG.getConstant(Opc, dl, MVT::i32));
6993
6994 Result = DAG.getSExtOrTrunc(Result, dl, VT);
6995
6996 if (Invert)
6997 Result = DAG.getNOT(dl, Result, VT);
6998
6999 return Result;
7000}
7001
7003 SDValue LHS = Op.getOperand(0);
7004 SDValue RHS = Op.getOperand(1);
7005 SDValue Carry = Op.getOperand(2);
7006 SDValue Cond = Op.getOperand(3);
7007 SDLoc DL(Op);
7008
7009 assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
7010
7011 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
7012 // have to invert the carry first.
7013 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
7014 DAG.getConstant(1, DL, MVT::i32), Carry);
7015 // This converts the boolean value carry into the carry flag.
7016 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
7017
7018 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
7019 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
7020
7021 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
7022 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
7023 SDValue ARMcc = DAG.getConstant(
7024 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
7025 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
7026 Cmp.getValue(1));
7027}
7028
7029/// isVMOVModifiedImm - Check if the specified splat value corresponds to a
7030/// valid vector constant for a NEON or MVE instruction with a "modified
7031/// immediate" operand (e.g., VMOV). If so, return the encoded value.
7032static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
7033 unsigned SplatBitSize, SelectionDAG &DAG,
7034 const SDLoc &dl, EVT &VT, EVT VectorVT,
7035 VMOVModImmType type) {
7036 unsigned OpCmode, Imm;
7037 bool is128Bits = VectorVT.is128BitVector();
7038
7039 // SplatBitSize is set to the smallest size that splats the vector, so a
7040 // zero vector will always have SplatBitSize == 8. However, NEON modified
7041 // immediate instructions others than VMOV do not support the 8-bit encoding
7042 // of a zero vector, and the default encoding of zero is supposed to be the
7043 // 32-bit version.
7044 if (SplatBits == 0)
7045 SplatBitSize = 32;
7046
7047 switch (SplatBitSize) {
7048 case 8:
7049 if (type != VMOVModImm)
7050 return SDValue();
7051 // Any 1-byte value is OK. Op=0, Cmode=1110.
7052 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
7053 OpCmode = 0xe;
7054 Imm = SplatBits;
7055 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
7056 break;
7057
7058 case 16:
7059 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
7060 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
7061 if ((SplatBits & ~0xff) == 0) {
7062 // Value = 0x00nn: Op=x, Cmode=100x.
7063 OpCmode = 0x8;
7064 Imm = SplatBits;
7065 break;
7066 }
7067 if ((SplatBits & ~0xff00) == 0) {
7068 // Value = 0xnn00: Op=x, Cmode=101x.
7069 OpCmode = 0xa;
7070 Imm = SplatBits >> 8;
7071 break;
7072 }
7073 return SDValue();
7074
7075 case 32:
7076 // NEON's 32-bit VMOV supports splat values where:
7077 // * only one byte is nonzero, or
7078 // * the least significant byte is 0xff and the second byte is nonzero, or
7079 // * the least significant 2 bytes are 0xff and the third is nonzero.
7080 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
7081 if ((SplatBits & ~0xff) == 0) {
7082 // Value = 0x000000nn: Op=x, Cmode=000x.
7083 OpCmode = 0;
7084 Imm = SplatBits;
7085 break;
7086 }
7087 if ((SplatBits & ~0xff00) == 0) {
7088 // Value = 0x0000nn00: Op=x, Cmode=001x.
7089 OpCmode = 0x2;
7090 Imm = SplatBits >> 8;
7091 break;
7092 }
7093 if ((SplatBits & ~0xff0000) == 0) {
7094 // Value = 0x00nn0000: Op=x, Cmode=010x.
7095 OpCmode = 0x4;
7096 Imm = SplatBits >> 16;
7097 break;
7098 }
7099 if ((SplatBits & ~0xff000000) == 0) {
7100 // Value = 0xnn000000: Op=x, Cmode=011x.
7101 OpCmode = 0x6;
7102 Imm = SplatBits >> 24;
7103 break;
7104 }
7105
7106 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
7107 if (type == OtherModImm) return SDValue();
7108
7109 if ((SplatBits & ~0xffff) == 0 &&
7110 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
7111 // Value = 0x0000nnff: Op=x, Cmode=1100.
7112 OpCmode = 0xc;
7113 Imm = SplatBits >> 8;
7114 break;
7115 }
7116
7117 // cmode == 0b1101 is not supported for MVE VMVN
7118 if (type == MVEVMVNModImm)
7119 return SDValue();
7120
7121 if ((SplatBits & ~0xffffff) == 0 &&
7122 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
7123 // Value = 0x00nnffff: Op=x, Cmode=1101.
7124 OpCmode = 0xd;
7125 Imm = SplatBits >> 16;
7126 break;
7127 }
7128
7129 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
7130 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
7131 // VMOV.I32. A (very) minor optimization would be to replicate the value
7132 // and fall through here to test for a valid 64-bit splat. But, then the
7133 // caller would also need to check and handle the change in size.
7134 return SDValue();
7135
7136 case 64: {
7137 if (type != VMOVModImm)
7138 return SDValue();
7139 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
7140 uint64_t BitMask = 0xff;
7141 unsigned ImmMask = 1;
7142 Imm = 0;
7143 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
7144 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
7145 Imm |= ImmMask;
7146 } else if ((SplatBits & BitMask) != 0) {
7147 return SDValue();
7148 }
7149 BitMask <<= 8;
7150 ImmMask <<= 1;
7151 }
7152
7153 // Op=1, Cmode=1110.
7154 OpCmode = 0x1e;
7155 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
7156 break;
7157 }
7158
7159 default:
7160 llvm_unreachable("unexpected size for isVMOVModifiedImm");
7161 }
7162
7163 unsigned EncodedVal = ARM_AM::createVMOVModImm(OpCmode, Imm);
7164 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
7165}
7166
7167SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
7168 const ARMSubtarget *ST) const {
7169 EVT VT = Op.getValueType();
7170 bool IsDouble = (VT == MVT::f64);
7171 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
7172 const APFloat &FPVal = CFP->getValueAPF();
7173
7174 // Prevent floating-point constants from using literal loads
7175 // when execute-only is enabled.
7176 if (ST->genExecuteOnly()) {
7177 // We shouldn't trigger this for v6m execute-only
7178 assert((!ST->isThumb1Only() || ST->hasV8MBaselineOps()) &&
7179 "Unexpected architecture");
7180
7181 // If we can represent the constant as an immediate, don't lower it
7182 if (isFPImmLegal(FPVal, VT))
7183 return Op;
7184 // Otherwise, construct as integer, and move to float register
7185 APInt INTVal = FPVal.bitcastToAPInt();
7186 SDLoc DL(CFP);
7187 switch (VT.getSimpleVT().SimpleTy) {
7188 default:
7189 llvm_unreachable("Unknown floating point type!");
7190 break;
7191 case MVT::f64: {
7192 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
7193 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
7194 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
7195 }
7196 case MVT::f32:
7197 return DAG.getNode(ARMISD::VMOVSR, DL, VT,
7198 DAG.getConstant(INTVal, DL, MVT::i32));
7199 }
7200 }
7201
7202 if (!ST->hasVFP3Base())
7203 return SDValue();
7204
7205 // Use the default (constant pool) lowering for double constants when we have
7206 // an SP-only FPU
7207 if (IsDouble && !Subtarget->hasFP64())
7208 return SDValue();
7209
7210 // Try splatting with a VMOV.f32...
7211 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
7212
7213 if (ImmVal != -1) {
7214 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
7215 // We have code in place to select a valid ConstantFP already, no need to
7216 // do any mangling.
7217 return Op;
7218 }
7219
7220 // It's a float and we are trying to use NEON operations where
7221 // possible. Lower it to a splat followed by an extract.
7222 SDLoc DL(Op);
7223 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
7224 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
7225 NewVal);
7226 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
7227 DAG.getConstant(0, DL, MVT::i32));
7228 }
7229
7230 // The rest of our options are NEON only, make sure that's allowed before
7231 // proceeding..
7232 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
7233 return SDValue();
7234
7235 EVT VMovVT;
7236 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
7237
7238 // It wouldn't really be worth bothering for doubles except for one very
7239 // important value, which does happen to match: 0.0. So make sure we don't do
7240 // anything stupid.
7241 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
7242 return SDValue();
7243
7244 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
7245 SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
7246 VMovVT, VT, VMOVModImm);
7247 if (NewVal != SDValue()) {
7248 SDLoc DL(Op);
7249 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
7250 NewVal);
7251 if (IsDouble)
7252 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7253
7254 // It's a float: cast and extract a vector element.
7255 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7256 VecConstant);
7257 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7258 DAG.getConstant(0, DL, MVT::i32));
7259 }
7260
7261 // Finally, try a VMVN.i32
7262 NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
7263 VT, VMVNModImm);
7264 if (NewVal != SDValue()) {
7265 SDLoc DL(Op);
7266 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
7267
7268 if (IsDouble)
7269 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7270
7271 // It's a float: cast and extract a vector element.
7272 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7273 VecConstant);
7274 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7275 DAG.getConstant(0, DL, MVT::i32));
7276 }
7277
7278 return SDValue();
7279}
7280
7281// check if an VEXT instruction can handle the shuffle mask when the
7282// vector sources of the shuffle are the same.
7283static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
7284 unsigned NumElts = VT.getVectorNumElements();
7285
7286 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7287 if (M[0] < 0)
7288 return false;
7289
7290 Imm = M[0];
7291
7292 // If this is a VEXT shuffle, the immediate value is the index of the first
7293 // element. The other shuffle indices must be the successive elements after
7294 // the first one.
7295 unsigned ExpectedElt = Imm;
7296 for (unsigned i = 1; i < NumElts; ++i) {
7297 // Increment the expected index. If it wraps around, just follow it
7298 // back to index zero and keep going.
7299 ++ExpectedElt;
7300 if (ExpectedElt == NumElts)
7301 ExpectedElt = 0;
7302
7303 if (M[i] < 0) continue; // ignore UNDEF indices
7304 if (ExpectedElt != static_cast<unsigned>(M[i]))
7305 return false;
7306 }
7307
7308 return true;
7309}
7310
7311static bool isVEXTMask(ArrayRef<int> M, EVT VT,
7312 bool &ReverseVEXT, unsigned &Imm) {
7313 unsigned NumElts = VT.getVectorNumElements();
7314 ReverseVEXT = false;
7315
7316 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7317 if (M[0] < 0)
7318 return false;
7319
7320 Imm = M[0];
7321
7322 // If this is a VEXT shuffle, the immediate value is the index of the first
7323 // element. The other shuffle indices must be the successive elements after
7324 // the first one.
7325 unsigned ExpectedElt = Imm;
7326 for (unsigned i = 1; i < NumElts; ++i) {
7327 // Increment the expected index. If it wraps around, it may still be
7328 // a VEXT but the source vectors must be swapped.
7329 ExpectedElt += 1;
7330 if (ExpectedElt == NumElts * 2) {
7331 ExpectedElt = 0;
7332 ReverseVEXT = true;
7333 }
7334
7335 if (M[i] < 0) continue; // ignore UNDEF indices
7336 if (ExpectedElt != static_cast<unsigned>(M[i]))
7337 return false;
7338 }
7339
7340 // Adjust the index value if the source operands will be swapped.
7341 if (ReverseVEXT)
7342 Imm -= NumElts;
7343
7344 return true;
7345}
7346
7347static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
7348 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
7349 // range, then 0 is placed into the resulting vector. So pretty much any mask
7350 // of 8 elements can work here.
7351 return VT == MVT::v8i8 && M.size() == 8;
7352}
7353
7354static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
7355 unsigned Index) {
7356 if (Mask.size() == Elements * 2)
7357 return Index / Elements;
7358 return Mask[Index] == 0 ? 0 : 1;
7359}
7360
7361// Checks whether the shuffle mask represents a vector transpose (VTRN) by
7362// checking that pairs of elements in the shuffle mask represent the same index
7363// in each vector, incrementing the expected index by 2 at each step.
7364// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
7365// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
7366// v2={e,f,g,h}
7367// WhichResult gives the offset for each element in the mask based on which
7368// of the two results it belongs to.
7369//
7370// The transpose can be represented either as:
7371// result1 = shufflevector v1, v2, result1_shuffle_mask
7372// result2 = shufflevector v1, v2, result2_shuffle_mask
7373// where v1/v2 and the shuffle masks have the same number of elements
7374// (here WhichResult (see below) indicates which result is being checked)
7375//
7376// or as:
7377// results = shufflevector v1, v2, shuffle_mask
7378// where both results are returned in one vector and the shuffle mask has twice
7379// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
7380// want to check the low half and high half of the shuffle mask as if it were
7381// the other case
7382static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7383 unsigned EltSz = VT.getScalarSizeInBits();
7384 if (EltSz == 64)
7385 return false;
7386
7387 unsigned NumElts = VT.getVectorNumElements();
7388 if (M.size() != NumElts && M.size() != NumElts*2)
7389 return false;
7390
7391 // If the mask is twice as long as the input vector then we need to check the
7392 // upper and lower parts of the mask with a matching value for WhichResult
7393 // FIXME: A mask with only even values will be rejected in case the first
7394 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
7395 // M[0] is used to determine WhichResult
7396 for (unsigned i = 0; i < M.size(); i += NumElts) {
7397 WhichResult = SelectPairHalf(NumElts, M, i);
7398 for (unsigned j = 0; j < NumElts; j += 2) {
7399 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7400 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
7401 return false;
7402 }
7403 }
7404
7405 if (M.size() == NumElts*2)
7406 WhichResult = 0;
7407
7408 return true;
7409}
7410
7411/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
7412/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7413/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
7414static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7415 unsigned EltSz = VT.getScalarSizeInBits();
7416 if (EltSz == 64)
7417 return false;
7418
7419 unsigned NumElts = VT.getVectorNumElements();
7420 if (M.size() != NumElts && M.size() != NumElts*2)
7421 return false;
7422
7423 for (unsigned i = 0; i < M.size(); i += NumElts) {
7424 WhichResult = SelectPairHalf(NumElts, M, i);
7425 for (unsigned j = 0; j < NumElts; j += 2) {
7426 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7427 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
7428 return false;
7429 }
7430 }
7431
7432 if (M.size() == NumElts*2)
7433 WhichResult = 0;
7434
7435 return true;
7436}
7437
7438// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
7439// that the mask elements are either all even and in steps of size 2 or all odd
7440// and in steps of size 2.
7441// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
7442// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
7443// v2={e,f,g,h}
7444// Requires similar checks to that of isVTRNMask with
7445// respect the how results are returned.
7446static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7447 unsigned EltSz = VT.getScalarSizeInBits();
7448 if (EltSz == 64)
7449 return false;
7450
7451 unsigned NumElts = VT.getVectorNumElements();
7452 if (M.size() != NumElts && M.size() != NumElts*2)
7453 return false;
7454
7455 for (unsigned i = 0; i < M.size(); i += NumElts) {
7456 WhichResult = SelectPairHalf(NumElts, M, i);
7457 for (unsigned j = 0; j < NumElts; ++j) {
7458 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
7459 return false;
7460 }
7461 }
7462
7463 if (M.size() == NumElts*2)
7464 WhichResult = 0;
7465
7466 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7467 if (VT.is64BitVector() && EltSz == 32)
7468 return false;
7469
7470 return true;
7471}
7472
7473/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
7474/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7475/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
7476static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7477 unsigned EltSz = VT.getScalarSizeInBits();
7478 if (EltSz == 64)
7479 return false;
7480
7481 unsigned NumElts = VT.getVectorNumElements();
7482 if (M.size() != NumElts && M.size() != NumElts*2)
7483 return false;
7484
7485 unsigned Half = NumElts / 2;
7486 for (unsigned i = 0; i < M.size(); i += NumElts) {
7487 WhichResult = SelectPairHalf(NumElts, M, i);
7488 for (unsigned j = 0; j < NumElts; j += Half) {
7489 unsigned Idx = WhichResult;
7490 for (unsigned k = 0; k < Half; ++k) {
7491 int MIdx = M[i + j + k];
7492 if (MIdx >= 0 && (unsigned) MIdx != Idx)
7493 return false;
7494 Idx += 2;
7495 }
7496 }
7497 }
7498
7499 if (M.size() == NumElts*2)
7500 WhichResult = 0;
7501
7502 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7503 if (VT.is64BitVector() && EltSz == 32)
7504 return false;
7505
7506 return true;
7507}
7508
7509// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
7510// that pairs of elements of the shufflemask represent the same index in each
7511// vector incrementing sequentially through the vectors.
7512// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
7513// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
7514// v2={e,f,g,h}
7515// Requires similar checks to that of isVTRNMask with respect the how results
7516// are returned.
7517static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7518 unsigned EltSz = VT.getScalarSizeInBits();
7519 if (EltSz == 64)
7520 return false;
7521
7522 unsigned NumElts = VT.getVectorNumElements();
7523 if (M.size() != NumElts && M.size() != NumElts*2)
7524 return false;
7525
7526 for (unsigned i = 0; i < M.size(); i += NumElts) {
7527 WhichResult = SelectPairHalf(NumElts, M, i);
7528 unsigned Idx = WhichResult * NumElts / 2;
7529 for (unsigned j = 0; j < NumElts; j += 2) {
7530 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7531 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
7532 return false;
7533 Idx += 1;
7534 }
7535 }
7536
7537 if (M.size() == NumElts*2)
7538 WhichResult = 0;
7539
7540 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7541 if (VT.is64BitVector() && EltSz == 32)
7542 return false;
7543
7544 return true;
7545}
7546
7547/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
7548/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7549/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
7550static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7551 unsigned EltSz = VT.getScalarSizeInBits();
7552 if (EltSz == 64)
7553 return false;
7554
7555 unsigned NumElts = VT.getVectorNumElements();
7556 if (M.size() != NumElts && M.size() != NumElts*2)
7557 return false;
7558
7559 for (unsigned i = 0; i < M.size(); i += NumElts) {
7560 WhichResult = SelectPairHalf(NumElts, M, i);
7561 unsigned Idx = WhichResult * NumElts / 2;
7562 for (unsigned j = 0; j < NumElts; j += 2) {
7563 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7564 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
7565 return false;
7566 Idx += 1;
7567 }
7568 }
7569
7570 if (M.size() == NumElts*2)
7571 WhichResult = 0;
7572
7573 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7574 if (VT.is64BitVector() && EltSz == 32)
7575 return false;
7576
7577 return true;
7578}
7579
7580/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
7581/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
7582static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
7583 unsigned &WhichResult,
7584 bool &isV_UNDEF) {
7585 isV_UNDEF = false;
7586 if (isVTRNMask(ShuffleMask, VT, WhichResult))
7587 return ARMISD::VTRN;
7588 if (isVUZPMask(ShuffleMask, VT, WhichResult))
7589 return ARMISD::VUZP;
7590 if (isVZIPMask(ShuffleMask, VT, WhichResult))
7591 return ARMISD::VZIP;
7592
7593 isV_UNDEF = true;
7594 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
7595 return ARMISD::VTRN;
7596 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7597 return ARMISD::VUZP;
7598 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7599 return ARMISD::VZIP;
7600
7601 return 0;
7602}
7603
7604/// \return true if this is a reverse operation on an vector.
7605static bool isReverseMask(ArrayRef<int> M, EVT VT) {
7606 unsigned NumElts = VT.getVectorNumElements();
7607 // Make sure the mask has the right size.
7608 if (NumElts != M.size())
7609 return false;
7610
7611 // Look for <15, ..., 3, -1, 1, 0>.
7612 for (unsigned i = 0; i != NumElts; ++i)
7613 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
7614 return false;
7615
7616 return true;
7617}
7618
7619static bool isTruncMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7620 unsigned NumElts = VT.getVectorNumElements();
7621 // Make sure the mask has the right size.
7622 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7623 return false;
7624
7625 // Half-width truncation patterns (e.g. v4i32 -> v8i16):
7626 // !Top && SingleSource: <0, 2, 4, 6, 0, 2, 4, 6>
7627 // !Top && !SingleSource: <0, 2, 4, 6, 8, 10, 12, 14>
7628 // Top && SingleSource: <1, 3, 5, 7, 1, 3, 5, 7>
7629 // Top && !SingleSource: <1, 3, 5, 7, 9, 11, 13, 15>
7630 int Ofs = Top ? 1 : 0;
7631 int Upper = SingleSource ? 0 : NumElts;
7632 for (int i = 0, e = NumElts / 2; i != e; ++i) {
7633 if (M[i] >= 0 && M[i] != (i * 2) + Ofs)
7634 return false;
7635 if (M[i + e] >= 0 && M[i + e] != (i * 2) + Ofs + Upper)
7636 return false;
7637 }
7638 return true;
7639}
7640
7641static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7642 unsigned NumElts = VT.getVectorNumElements();
7643 // Make sure the mask has the right size.
7644 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7645 return false;
7646
7647 // If Top
7648 // Look for <0, N, 2, N+2, 4, N+4, ..>.
7649 // This inserts Input2 into Input1
7650 // else if not Top
7651 // Look for <0, N+1, 2, N+3, 4, N+5, ..>
7652 // This inserts Input1 into Input2
7653 unsigned Offset = Top ? 0 : 1;
7654 unsigned N = SingleSource ? 0 : NumElts;
7655 for (unsigned i = 0; i < NumElts; i += 2) {
7656 if (M[i] >= 0 && M[i] != (int)i)
7657 return false;
7658 if (M[i + 1] >= 0 && M[i + 1] != (int)(N + i + Offset))
7659 return false;
7660 }
7661
7662 return true;
7663}
7664
7665static bool isVMOVNTruncMask(ArrayRef<int> M, EVT ToVT, bool rev) {
7666 unsigned NumElts = ToVT.getVectorNumElements();
7667 if (NumElts != M.size())
7668 return false;
7669
7670 // Test if the Trunc can be convertable to a VMOVN with this shuffle. We are
7671 // looking for patterns of:
7672 // !rev: 0 N/2 1 N/2+1 2 N/2+2 ...
7673 // rev: N/2 0 N/2+1 1 N/2+2 2 ...
7674
7675 unsigned Off0 = rev ? NumElts / 2 : 0;
7676 unsigned Off1 = rev ? 0 : NumElts / 2;
7677 for (unsigned i = 0; i < NumElts; i += 2) {
7678 if (M[i] >= 0 && M[i] != (int)(Off0 + i / 2))
7679 return false;
7680 if (M[i + 1] >= 0 && M[i + 1] != (int)(Off1 + i / 2))
7681 return false;
7682 }
7683
7684 return true;
7685}
7686
7687// Reconstruct an MVE VCVT from a BuildVector of scalar fptrunc, all extracted
7688// from a pair of inputs. For example:
7689// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7690// FP_ROUND(EXTRACT_ELT(Y, 0),
7691// FP_ROUND(EXTRACT_ELT(X, 1),
7692// FP_ROUND(EXTRACT_ELT(Y, 1), ...)
7694 const ARMSubtarget *ST) {
7695 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7696 if (!ST->hasMVEFloatOps())
7697 return SDValue();
7698
7699 SDLoc dl(BV);
7700 EVT VT = BV.getValueType();
7701 if (VT != MVT::v8f16)
7702 return SDValue();
7703
7704 // We are looking for a buildvector of fptrunc elements, where all the
7705 // elements are interleavingly extracted from two sources. Check the first two
7706 // items are valid enough and extract some info from them (they are checked
7707 // properly in the loop below).
7708 if (BV.getOperand(0).getOpcode() != ISD::FP_ROUND ||
7711 return SDValue();
7712 if (BV.getOperand(1).getOpcode() != ISD::FP_ROUND ||
7715 return SDValue();
7716 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7717 SDValue Op1 = BV.getOperand(1).getOperand(0).getOperand(0);
7718 if (Op0.getValueType() != MVT::v4f32 || Op1.getValueType() != MVT::v4f32)
7719 return SDValue();
7720
7721 // Check all the values in the BuildVector line up with our expectations.
7722 for (unsigned i = 1; i < 4; i++) {
7723 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7724 return Trunc.getOpcode() == ISD::FP_ROUND &&
7726 Trunc.getOperand(0).getOperand(0) == Op &&
7727 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7728 };
7729 if (!Check(BV.getOperand(i * 2 + 0), Op0, i))
7730 return SDValue();
7731 if (!Check(BV.getOperand(i * 2 + 1), Op1, i))
7732 return SDValue();
7733 }
7734
7735 SDValue N1 = DAG.getNode(ARMISD::VCVTN, dl, VT, DAG.getUNDEF(VT), Op0,
7736 DAG.getConstant(0, dl, MVT::i32));
7737 return DAG.getNode(ARMISD::VCVTN, dl, VT, N1, Op1,
7738 DAG.getConstant(1, dl, MVT::i32));
7739}
7740
7741// Reconstruct an MVE VCVT from a BuildVector of scalar fpext, all extracted
7742// from a single input on alternating lanes. For example:
7743// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7744// FP_ROUND(EXTRACT_ELT(X, 2),
7745// FP_ROUND(EXTRACT_ELT(X, 4), ...)
7747 const ARMSubtarget *ST) {
7748 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7749 if (!ST->hasMVEFloatOps())
7750 return SDValue();
7751
7752 SDLoc dl(BV);
7753 EVT VT = BV.getValueType();
7754 if (VT != MVT::v4f32)
7755 return SDValue();
7756
7757 // We are looking for a buildvector of fptext elements, where all the
7758 // elements are alternating lanes from a single source. For example <0,2,4,6>
7759 // or <1,3,5,7>. Check the first two items are valid enough and extract some
7760 // info from them (they are checked properly in the loop below).
7761 if (BV.getOperand(0).getOpcode() != ISD::FP_EXTEND ||
7763 return SDValue();
7764 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7766 if (Op0.getValueType() != MVT::v8f16 || (Offset != 0 && Offset != 1))
7767 return SDValue();
7768
7769 // Check all the values in the BuildVector line up with our expectations.
7770 for (unsigned i = 1; i < 4; i++) {
7771 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7772 return Trunc.getOpcode() == ISD::FP_EXTEND &&
7774 Trunc.getOperand(0).getOperand(0) == Op &&
7775 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7776 };
7777 if (!Check(BV.getOperand(i), Op0, 2 * i + Offset))
7778 return SDValue();
7779 }
7780
7781 return DAG.getNode(ARMISD::VCVTL, dl, VT, Op0,
7782 DAG.getConstant(Offset, dl, MVT::i32));
7783}
7784
7785// If N is an integer constant that can be moved into a register in one
7786// instruction, return an SDValue of such a constant (will become a MOV
7787// instruction). Otherwise return null.
7789 const ARMSubtarget *ST, const SDLoc &dl) {
7790 uint64_t Val;
7791 if (!isa<ConstantSDNode>(N))
7792 return SDValue();
7793 Val = N->getAsZExtVal();
7794
7795 if (ST->isThumb1Only()) {
7796 if (Val <= 255 || ~Val <= 255)
7797 return DAG.getConstant(Val, dl, MVT::i32);
7798 } else {
7799 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
7800 return DAG.getConstant(Val, dl, MVT::i32);
7801 }
7802 return SDValue();
7803}
7804
7806 const ARMSubtarget *ST) {
7807 SDLoc dl(Op);
7808 EVT VT = Op.getValueType();
7809
7810 assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!");
7811
7812 unsigned NumElts = VT.getVectorNumElements();
7813 unsigned BoolMask;
7814 unsigned BitsPerBool;
7815 if (NumElts == 2) {
7816 BitsPerBool = 8;
7817 BoolMask = 0xff;
7818 } else if (NumElts == 4) {
7819 BitsPerBool = 4;
7820 BoolMask = 0xf;
7821 } else if (NumElts == 8) {
7822 BitsPerBool = 2;
7823 BoolMask = 0x3;
7824 } else if (NumElts == 16) {
7825 BitsPerBool = 1;
7826 BoolMask = 0x1;
7827 } else
7828 return SDValue();
7829
7830 // If this is a single value copied into all lanes (a splat), we can just sign
7831 // extend that single value
7832 SDValue FirstOp = Op.getOperand(0);
7833 if (!isa<ConstantSDNode>(FirstOp) &&
7834 llvm::all_of(llvm::drop_begin(Op->ops()), [&FirstOp](const SDUse &U) {
7835 return U.get().isUndef() || U.get() == FirstOp;
7836 })) {
7837 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp,
7838 DAG.getValueType(MVT::i1));
7839 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext);
7840 }
7841
7842 // First create base with bits set where known
7843 unsigned Bits32 = 0;
7844 for (unsigned i = 0; i < NumElts; ++i) {
7845 SDValue V = Op.getOperand(i);
7846 if (!isa<ConstantSDNode>(V) && !V.isUndef())
7847 continue;
7848 bool BitSet = V.isUndef() ? false : V->getAsZExtVal();
7849 if (BitSet)
7850 Bits32 |= BoolMask << (i * BitsPerBool);
7851 }
7852
7853 // Add in unknown nodes
7855 DAG.getConstant(Bits32, dl, MVT::i32));
7856 for (unsigned i = 0; i < NumElts; ++i) {
7857 SDValue V = Op.getOperand(i);
7858 if (isa<ConstantSDNode>(V) || V.isUndef())
7859 continue;
7860 Base = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Base, V,
7861 DAG.getConstant(i, dl, MVT::i32));
7862 }
7863
7864 return Base;
7865}
7866
7868 const ARMSubtarget *ST) {
7869 if (!ST->hasMVEIntegerOps())
7870 return SDValue();
7871
7872 // We are looking for a buildvector where each element is Op[0] + i*N
7873 EVT VT = Op.getValueType();
7874 SDValue Op0 = Op.getOperand(0);
7875 unsigned NumElts = VT.getVectorNumElements();
7876
7877 // Get the increment value from operand 1
7878 SDValue Op1 = Op.getOperand(1);
7879 if (Op1.getOpcode() != ISD::ADD || Op1.getOperand(0) != Op0 ||
7880 !isa<ConstantSDNode>(Op1.getOperand(1)))
7881 return SDValue();
7882 unsigned N = Op1.getConstantOperandVal(1);
7883 if (N != 1 && N != 2 && N != 4 && N != 8)
7884 return SDValue();
7885
7886 // Check that each other operand matches
7887 for (unsigned I = 2; I < NumElts; I++) {
7888 SDValue OpI = Op.getOperand(I);
7889 if (OpI.getOpcode() != ISD::ADD || OpI.getOperand(0) != Op0 ||
7890 !isa<ConstantSDNode>(OpI.getOperand(1)) ||
7891 OpI.getConstantOperandVal(1) != I * N)
7892 return SDValue();
7893 }
7894
7895 SDLoc DL(Op);
7896 return DAG.getNode(ARMISD::VIDUP, DL, DAG.getVTList(VT, MVT::i32), Op0,
7897 DAG.getConstant(N, DL, MVT::i32));
7898}
7899
7900// Returns true if the operation N can be treated as qr instruction variant at
7901// operand Op.
7902static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op) {
7903 switch (N->getOpcode()) {
7904 case ISD::ADD:
7905 case ISD::MUL:
7906 case ISD::SADDSAT:
7907 case ISD::UADDSAT:
7908 case ISD::AVGFLOORS:
7909 case ISD::AVGFLOORU:
7910 return true;
7911 case ISD::SUB:
7912 case ISD::SSUBSAT:
7913 case ISD::USUBSAT:
7914 return N->getOperand(1).getNode() == Op;
7916 switch (N->getConstantOperandVal(0)) {
7917 case Intrinsic::arm_mve_add_predicated:
7918 case Intrinsic::arm_mve_mul_predicated:
7919 case Intrinsic::arm_mve_qadd_predicated:
7920 case Intrinsic::arm_mve_vhadd:
7921 case Intrinsic::arm_mve_hadd_predicated:
7922 case Intrinsic::arm_mve_vqdmulh:
7923 case Intrinsic::arm_mve_qdmulh_predicated:
7924 case Intrinsic::arm_mve_vqrdmulh:
7925 case Intrinsic::arm_mve_qrdmulh_predicated:
7926 case Intrinsic::arm_mve_vqdmull:
7927 case Intrinsic::arm_mve_vqdmull_predicated:
7928 return true;
7929 case Intrinsic::arm_mve_sub_predicated:
7930 case Intrinsic::arm_mve_qsub_predicated:
7931 case Intrinsic::arm_mve_vhsub:
7932 case Intrinsic::arm_mve_hsub_predicated:
7933 return N->getOperand(2).getNode() == Op;
7934 default:
7935 return false;
7936 }
7937 default:
7938 return false;
7939 }
7940}
7941
7942// If this is a case we can't handle, return null and let the default
7943// expansion code take care of it.
7944SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
7945 const ARMSubtarget *ST) const {
7946 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
7947 SDLoc dl(Op);
7948 EVT VT = Op.getValueType();
7949
7950 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
7951 return LowerBUILD_VECTOR_i1(Op, DAG, ST);
7952
7953 if (SDValue R = LowerBUILD_VECTORToVIDUP(Op, DAG, ST))
7954 return R;
7955
7956 APInt SplatBits, SplatUndef;
7957 unsigned SplatBitSize;
7958 bool HasAnyUndefs;
7959 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7960 if (SplatUndef.isAllOnes())
7961 return DAG.getUNDEF(VT);
7962
7963 // If all the users of this constant splat are qr instruction variants,
7964 // generate a vdup of the constant.
7965 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize &&
7966 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&
7967 all_of(BVN->users(),
7968 [BVN](const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {
7969 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7970 : SplatBitSize == 16 ? MVT::v8i16
7971 : MVT::v16i8;
7972 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7973 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7974 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7975 }
7976
7977 if ((ST->hasNEON() && SplatBitSize <= 64) ||
7978 (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
7979 // Check if an immediate VMOV works.
7980 EVT VmovVT;
7981 SDValue Val =
7982 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
7983 SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm);
7984
7985 if (Val.getNode()) {
7986 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
7987 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
7988 }
7989
7990 // Try an immediate VMVN.
7991 uint64_t NegatedImm = (~SplatBits).getZExtValue();
7992 Val = isVMOVModifiedImm(
7993 NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
7994 VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
7995 if (Val.getNode()) {
7996 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
7997 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
7998 }
7999
8000 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
8001 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
8002 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
8003 if (ImmVal != -1) {
8004 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
8005 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
8006 }
8007 }
8008
8009 // If we are under MVE, generate a VDUP(constant), bitcast to the original
8010 // type.
8011 if (ST->hasMVEIntegerOps() &&
8012 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32)) {
8013 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
8014 : SplatBitSize == 16 ? MVT::v8i16
8015 : MVT::v16i8;
8016 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
8017 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
8018 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
8019 }
8020 }
8021 }
8022
8023 // Scan through the operands to see if only one value is used.
8024 //
8025 // As an optimisation, even if more than one value is used it may be more
8026 // profitable to splat with one value then change some lanes.
8027 //
8028 // Heuristically we decide to do this if the vector has a "dominant" value,
8029 // defined as splatted to more than half of the lanes.
8030 unsigned NumElts = VT.getVectorNumElements();
8031 bool isOnlyLowElement = true;
8032 bool usesOnlyOneValue = true;
8033 bool hasDominantValue = false;
8034 bool isConstant = true;
8035
8036 // Map of the number of times a particular SDValue appears in the
8037 // element list.
8038 DenseMap<SDValue, unsigned> ValueCounts;
8039 SDValue Value;
8040 for (unsigned i = 0; i < NumElts; ++i) {
8041 SDValue V = Op.getOperand(i);
8042 if (V.isUndef())
8043 continue;
8044 if (i > 0)
8045 isOnlyLowElement = false;
8046 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
8047 isConstant = false;
8048
8049 unsigned &Count = ValueCounts[V];
8050
8051 // Is this value dominant? (takes up more than half of the lanes)
8052 if (++Count > (NumElts / 2)) {
8053 hasDominantValue = true;
8054 Value = V;
8055 }
8056 }
8057 if (ValueCounts.size() != 1)
8058 usesOnlyOneValue = false;
8059 if (!Value.getNode() && !ValueCounts.empty())
8060 Value = ValueCounts.begin()->first;
8061
8062 if (ValueCounts.empty())
8063 return DAG.getUNDEF(VT);
8064
8065 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
8066 // Keep going if we are hitting this case.
8067 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
8068 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
8069
8070 unsigned EltSize = VT.getScalarSizeInBits();
8071
8072 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
8073 // i32 and try again.
8074 if (hasDominantValue && EltSize <= 32) {
8075 if (!isConstant) {
8076 SDValue N;
8077
8078 // If we are VDUPing a value that comes directly from a vector, that will
8079 // cause an unnecessary move to and from a GPR, where instead we could
8080 // just use VDUPLANE. We can only do this if the lane being extracted
8081 // is at a constant index, as the VDUP from lane instructions only have
8082 // constant-index forms.
8083 ConstantSDNode *constIndex;
8084 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8085 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
8086 // We need to create a new undef vector to use for the VDUPLANE if the
8087 // size of the vector from which we get the value is different than the
8088 // size of the vector that we need to create. We will insert the element
8089 // such that the register coalescer will remove unnecessary copies.
8090 if (VT != Value->getOperand(0).getValueType()) {
8091 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
8093 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8094 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
8095 Value, DAG.getConstant(index, dl, MVT::i32)),
8096 DAG.getConstant(index, dl, MVT::i32));
8097 } else
8098 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8099 Value->getOperand(0), Value->getOperand(1));
8100 } else
8101 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
8102
8103 if (!usesOnlyOneValue) {
8104 // The dominant value was splatted as 'N', but we now have to insert
8105 // all differing elements.
8106 for (unsigned I = 0; I < NumElts; ++I) {
8107 if (Op.getOperand(I) == Value)
8108 continue;
8110 Ops.push_back(N);
8111 Ops.push_back(Op.getOperand(I));
8112 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
8113 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
8114 }
8115 }
8116 return N;
8117 }
8121 assert(FVT == MVT::f32 || FVT == MVT::f16);
8122 MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
8123 for (unsigned i = 0; i < NumElts; ++i)
8124 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, IVT,
8125 Op.getOperand(i)));
8126 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), IVT, NumElts);
8127 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
8128 Val = LowerBUILD_VECTOR(Val, DAG, ST);
8129 if (Val.getNode())
8130 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8131 }
8132 if (usesOnlyOneValue) {
8133 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
8134 if (isConstant && Val.getNode())
8135 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
8136 }
8137 }
8138
8139 // If all elements are constants and the case above didn't get hit, fall back
8140 // to the default expansion, which will generate a load from the constant
8141 // pool.
8142 if (isConstant)
8143 return SDValue();
8144
8145 // Reconstruct the BUILDVECTOR to one of the legal shuffles (such as vext and
8146 // vmovn). Empirical tests suggest this is rarely worth it for vectors of
8147 // length <= 2.
8148 if (NumElts >= 4)
8149 if (SDValue shuffle = ReconstructShuffle(Op, DAG))
8150 return shuffle;
8151
8152 // Attempt to turn a buildvector of scalar fptrunc's or fpext's back into
8153 // VCVT's
8154 if (SDValue VCVT = LowerBuildVectorOfFPTrunc(Op, DAG, Subtarget))
8155 return VCVT;
8156 if (SDValue VCVT = LowerBuildVectorOfFPExt(Op, DAG, Subtarget))
8157 return VCVT;
8158
8159 if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
8160 // If we haven't found an efficient lowering, try splitting a 128-bit vector
8161 // into two 64-bit vectors; we might discover a better way to lower it.
8162 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
8163 EVT ExtVT = VT.getVectorElementType();
8164 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
8165 SDValue Lower = DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[0], NumElts / 2));
8166 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
8167 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
8168 SDValue Upper =
8169 DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[NumElts / 2], NumElts / 2));
8170 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
8171 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
8172 if (Lower && Upper)
8173 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
8174 }
8175
8176 // Vectors with 32- or 64-bit elements can be built by directly assigning
8177 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
8178 // will be legalized.
8179 if (EltSize >= 32) {
8180 // Do the expansion with floating-point types, since that is what the VFP
8181 // registers are defined to use, and since i64 is not legal.
8182 EVT EltVT = EVT::getFloatingPointVT(EltSize);
8183 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8185 for (unsigned i = 0; i < NumElts; ++i)
8186 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
8187 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8188 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8189 }
8190
8191 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
8192 // know the default expansion would otherwise fall back on something even
8193 // worse. For a vector with one or two non-undef values, that's
8194 // scalar_to_vector for the elements followed by a shuffle (provided the
8195 // shuffle is valid for the target) and materialization element by element
8196 // on the stack followed by a load for everything else.
8197 if (!isConstant && !usesOnlyOneValue) {
8198 SDValue Vec = DAG.getUNDEF(VT);
8199 for (unsigned i = 0 ; i < NumElts; ++i) {
8200 SDValue V = Op.getOperand(i);
8201 if (V.isUndef())
8202 continue;
8203 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
8204 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
8205 }
8206 return Vec;
8207 }
8208
8209 return SDValue();
8210}
8211
8212// Gather data to see if the operation can be modelled as a
8213// shuffle in combination with VEXTs.
8214SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
8215 SelectionDAG &DAG) const {
8216 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
8217 SDLoc dl(Op);
8218 EVT VT = Op.getValueType();
8219 unsigned NumElts = VT.getVectorNumElements();
8220
8221 struct ShuffleSourceInfo {
8222 SDValue Vec;
8223 unsigned MinElt = std::numeric_limits<unsigned>::max();
8224 unsigned MaxElt = 0;
8225
8226 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
8227 // be compatible with the shuffle we intend to construct. As a result
8228 // ShuffleVec will be some sliding window into the original Vec.
8229 SDValue ShuffleVec;
8230
8231 // Code should guarantee that element i in Vec starts at element "WindowBase
8232 // + i * WindowScale in ShuffleVec".
8233 int WindowBase = 0;
8234 int WindowScale = 1;
8235
8236 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
8237
8238 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
8239 };
8240
8241 // First gather all vectors used as an immediate source for this BUILD_VECTOR
8242 // node.
8244 for (unsigned i = 0; i < NumElts; ++i) {
8245 SDValue V = Op.getOperand(i);
8246 if (V.isUndef())
8247 continue;
8248 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
8249 // A shuffle can only come from building a vector from various
8250 // elements of other vectors.
8251 return SDValue();
8252 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
8253 // Furthermore, shuffles require a constant mask, whereas extractelts
8254 // accept variable indices.
8255 return SDValue();
8256 }
8257
8258 // Add this element source to the list if it's not already there.
8259 SDValue SourceVec = V.getOperand(0);
8260 auto Source = llvm::find(Sources, SourceVec);
8261 if (Source == Sources.end())
8262 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
8263
8264 // Update the minimum and maximum lane number seen.
8265 unsigned EltNo = V.getConstantOperandVal(1);
8266 Source->MinElt = std::min(Source->MinElt, EltNo);
8267 Source->MaxElt = std::max(Source->MaxElt, EltNo);
8268 }
8269
8270 // Currently only do something sane when at most two source vectors
8271 // are involved.
8272 if (Sources.size() > 2)
8273 return SDValue();
8274
8275 // Find out the smallest element size among result and two sources, and use
8276 // it as element size to build the shuffle_vector.
8277 EVT SmallestEltTy = VT.getVectorElementType();
8278 for (auto &Source : Sources) {
8279 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
8280 if (SrcEltTy.bitsLT(SmallestEltTy))
8281 SmallestEltTy = SrcEltTy;
8282 }
8283 unsigned ResMultiplier =
8284 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
8285 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
8286 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
8287
8288 // If the source vector is too wide or too narrow, we may nevertheless be able
8289 // to construct a compatible shuffle either by concatenating it with UNDEF or
8290 // extracting a suitable range of elements.
8291 for (auto &Src : Sources) {
8292 EVT SrcVT = Src.ShuffleVec.getValueType();
8293
8294 uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
8295 uint64_t VTSize = VT.getFixedSizeInBits();
8296 if (SrcVTSize == VTSize)
8297 continue;
8298
8299 // This stage of the search produces a source with the same element type as
8300 // the original, but with a total width matching the BUILD_VECTOR output.
8301 EVT EltVT = SrcVT.getVectorElementType();
8302 unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
8303 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
8304
8305 if (SrcVTSize < VTSize) {
8306 if (2 * SrcVTSize != VTSize)
8307 return SDValue();
8308 // We can pad out the smaller vector for free, so if it's part of a
8309 // shuffle...
8310 Src.ShuffleVec =
8311 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
8312 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
8313 continue;
8314 }
8315
8316 if (SrcVTSize != 2 * VTSize)
8317 return SDValue();
8318
8319 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8320 // Span too large for a VEXT to cope
8321 return SDValue();
8322 }
8323
8324 if (Src.MinElt >= NumSrcElts) {
8325 // The extraction can just take the second half
8326 Src.ShuffleVec =
8327 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8328 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8329 Src.WindowBase = -NumSrcElts;
8330 } else if (Src.MaxElt < NumSrcElts) {
8331 // The extraction can just take the first half
8332 Src.ShuffleVec =
8333 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8334 DAG.getConstant(0, dl, MVT::i32));
8335 } else {
8336 // An actual VEXT is needed
8337 SDValue VEXTSrc1 =
8338 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8339 DAG.getConstant(0, dl, MVT::i32));
8340 SDValue VEXTSrc2 =
8341 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8342 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8343
8344 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
8345 VEXTSrc2,
8346 DAG.getConstant(Src.MinElt, dl, MVT::i32));
8347 Src.WindowBase = -Src.MinElt;
8348 }
8349 }
8350
8351 // Another possible incompatibility occurs from the vector element types. We
8352 // can fix this by bitcasting the source vectors to the same type we intend
8353 // for the shuffle.
8354 for (auto &Src : Sources) {
8355 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8356 if (SrcEltTy == SmallestEltTy)
8357 continue;
8358 assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
8359 Src.ShuffleVec = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, ShuffleVT, Src.ShuffleVec);
8360 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
8361 Src.WindowBase *= Src.WindowScale;
8362 }
8363
8364 // Final check before we try to actually produce a shuffle.
8365 LLVM_DEBUG({
8366 for (auto Src : Sources)
8367 assert(Src.ShuffleVec.getValueType() == ShuffleVT);
8368 });
8369
8370 // The stars all align, our next step is to produce the mask for the shuffle.
8372 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
8373 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
8374 SDValue Entry = Op.getOperand(i);
8375 if (Entry.isUndef())
8376 continue;
8377
8378 auto Src = llvm::find(Sources, Entry.getOperand(0));
8379 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8380
8381 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
8382 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
8383 // segment.
8384 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8385 int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
8386 VT.getScalarSizeInBits());
8387 int LanesDefined = BitsDefined / BitsPerShuffleLane;
8388
8389 // This source is expected to fill ResMultiplier lanes of the final shuffle,
8390 // starting at the appropriate offset.
8391 int *LaneMask = &Mask[i * ResMultiplier];
8392
8393 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8394 ExtractBase += NumElts * (Src - Sources.begin());
8395 for (int j = 0; j < LanesDefined; ++j)
8396 LaneMask[j] = ExtractBase + j;
8397 }
8398
8399
8400 // We can't handle more than two sources. This should have already
8401 // been checked before this point.
8402 assert(Sources.size() <= 2 && "Too many sources!");
8403
8404 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
8405 for (unsigned i = 0; i < Sources.size(); ++i)
8406 ShuffleOps[i] = Sources[i].ShuffleVec;
8407
8408 SDValue Shuffle = buildLegalVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
8409 ShuffleOps[1], Mask, DAG);
8410 if (!Shuffle)
8411 return SDValue();
8412 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Shuffle);
8413}
8414
8416 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8425 OP_VUZPL, // VUZP, left result
8426 OP_VUZPR, // VUZP, right result
8427 OP_VZIPL, // VZIP, left result
8428 OP_VZIPR, // VZIP, right result
8429 OP_VTRNL, // VTRN, left result
8430 OP_VTRNR // VTRN, right result
8432
8433static bool isLegalMVEShuffleOp(unsigned PFEntry) {
8434 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8435 switch (OpNum) {
8436 case OP_COPY:
8437 case OP_VREV:
8438 case OP_VDUP0:
8439 case OP_VDUP1:
8440 case OP_VDUP2:
8441 case OP_VDUP3:
8442 return true;
8443 }
8444 return false;
8445}
8446
8447/// isShuffleMaskLegal - Targets can use this to indicate that they only
8448/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
8449/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
8450/// are assumed to be legal.
8452 if (VT.getVectorNumElements() == 4 &&
8453 (VT.is128BitVector() || VT.is64BitVector())) {
8454 unsigned PFIndexes[4];
8455 for (unsigned i = 0; i != 4; ++i) {
8456 if (M[i] < 0)
8457 PFIndexes[i] = 8;
8458 else
8459 PFIndexes[i] = M[i];
8460 }
8461
8462 // Compute the index in the perfect shuffle table.
8463 unsigned PFTableIndex =
8464 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8465 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8466 unsigned Cost = (PFEntry >> 30);
8467
8468 if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
8469 return true;
8470 }
8471
8472 bool ReverseVEXT, isV_UNDEF;
8473 unsigned Imm, WhichResult;
8474
8475 unsigned EltSize = VT.getScalarSizeInBits();
8476 if (EltSize >= 32 ||
8478 ShuffleVectorInst::isIdentityMask(M, M.size()) ||
8479 isVREVMask(M, VT, 64) ||
8480 isVREVMask(M, VT, 32) ||
8481 isVREVMask(M, VT, 16))
8482 return true;
8483 else if (Subtarget->hasNEON() &&
8484 (isVEXTMask(M, VT, ReverseVEXT, Imm) ||
8485 isVTBLMask(M, VT) ||
8486 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
8487 return true;
8488 else if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8489 isReverseMask(M, VT))
8490 return true;
8491 else if (Subtarget->hasMVEIntegerOps() &&
8492 (isVMOVNMask(M, VT, true, false) ||
8493 isVMOVNMask(M, VT, false, false) || isVMOVNMask(M, VT, true, true)))
8494 return true;
8495 else if (Subtarget->hasMVEIntegerOps() &&
8496 (isTruncMask(M, VT, false, false) ||
8497 isTruncMask(M, VT, false, true) ||
8498 isTruncMask(M, VT, true, false) || isTruncMask(M, VT, true, true)))
8499 return true;
8500 else
8501 return false;
8502}
8503
8504/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8505/// the specified operations to build the shuffle.
8506static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8507 SDValue RHS, SelectionDAG &DAG,
8508 const SDLoc &dl) {
8509 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8510 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8511 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8512
8513 if (OpNum == OP_COPY) {
8514 if (LHSID == (1*9+2)*9+3) return LHS;
8515 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8516 return RHS;
8517 }
8518
8519 SDValue OpLHS, OpRHS;
8520 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8521 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8522 EVT VT = OpLHS.getValueType();
8523
8524 switch (OpNum) {
8525 default: llvm_unreachable("Unknown shuffle opcode!");
8526 case OP_VREV:
8527 // VREV divides the vector in half and swaps within the half.
8528 if (VT.getScalarSizeInBits() == 32)
8529 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
8530 // vrev <4 x i16> -> VREV32
8531 if (VT.getScalarSizeInBits() == 16)
8532 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
8533 // vrev <4 x i8> -> VREV16
8534 assert(VT.getScalarSizeInBits() == 8);
8535 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
8536 case OP_VDUP0:
8537 case OP_VDUP1:
8538 case OP_VDUP2:
8539 case OP_VDUP3:
8540 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8541 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
8542 case OP_VEXT1:
8543 case OP_VEXT2:
8544 case OP_VEXT3:
8545 return DAG.getNode(ARMISD::VEXT, dl, VT,
8546 OpLHS, OpRHS,
8547 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
8548 case OP_VUZPL:
8549 case OP_VUZPR:
8550 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
8551 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
8552 case OP_VZIPL:
8553 case OP_VZIPR:
8554 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
8555 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
8556 case OP_VTRNL:
8557 case OP_VTRNR:
8558 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
8559 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
8560 }
8561}
8562
8564 ArrayRef<int> ShuffleMask,
8565 SelectionDAG &DAG) {
8566 // Check to see if we can use the VTBL instruction.
8567 SDValue V1 = Op.getOperand(0);
8568 SDValue V2 = Op.getOperand(1);
8569 SDLoc DL(Op);
8570
8571 SmallVector<SDValue, 8> VTBLMask;
8572 for (int I : ShuffleMask)
8573 VTBLMask.push_back(DAG.getSignedConstant(I, DL, MVT::i32));
8574
8575 if (V2.getNode()->isUndef())
8576 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
8577 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8578
8579 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
8580 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8581}
8582
8584 SDLoc DL(Op);
8585 EVT VT = Op.getValueType();
8586
8587 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8588 "Expect an v8i16/v16i8 type");
8589 SDValue OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, Op.getOperand(0));
8590 // For a v16i8 type: After the VREV, we have got <7, ..., 0, 15, ..., 8>. Now,
8591 // extract the first 8 bytes into the top double word and the last 8 bytes
8592 // into the bottom double word, through a new vector shuffle that will be
8593 // turned into a VEXT on Neon, or a couple of VMOVDs on MVE.
8594 std::vector<int> NewMask;
8595 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8596 NewMask.push_back(VT.getVectorNumElements() / 2 + i);
8597 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8598 NewMask.push_back(i);
8599 return DAG.getVectorShuffle(VT, DL, OpLHS, OpLHS, NewMask);
8600}
8601
8603 switch (VT.getSimpleVT().SimpleTy) {
8604 case MVT::v2i1:
8605 return MVT::v2f64;
8606 case MVT::v4i1:
8607 return MVT::v4i32;
8608 case MVT::v8i1:
8609 return MVT::v8i16;
8610 case MVT::v16i1:
8611 return MVT::v16i8;
8612 default:
8613 llvm_unreachable("Unexpected vector predicate type");
8614 }
8615}
8616
8618 SelectionDAG &DAG) {
8619 // Converting from boolean predicates to integers involves creating a vector
8620 // of all ones or all zeroes and selecting the lanes based upon the real
8621 // predicate.
8623 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), dl, MVT::i32);
8624 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllOnes);
8625
8626 SDValue AllZeroes =
8627 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0x0), dl, MVT::i32);
8628 AllZeroes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes);
8629
8630 // Get full vector type from predicate type
8632
8633 SDValue RecastV1;
8634 // If the real predicate is an v8i1 or v4i1 (not v16i1) then we need to recast
8635 // this to a v16i1. This cannot be done with an ordinary bitcast because the
8636 // sizes are not the same. We have to use a MVE specific PREDICATE_CAST node,
8637 // since we know in hardware the sizes are really the same.
8638 if (VT != MVT::v16i1)
8639 RecastV1 = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred);
8640 else
8641 RecastV1 = Pred;
8642
8643 // Select either all ones or zeroes depending upon the real predicate bits.
8644 SDValue PredAsVector =
8645 DAG.getNode(ISD::VSELECT, dl, MVT::v16i8, RecastV1, AllOnes, AllZeroes);
8646
8647 // Recast our new predicate-as-integer v16i8 vector into something
8648 // appropriate for the shuffle, i.e. v4i32 for a real v4i1 predicate.
8649 return DAG.getNode(ISD::BITCAST, dl, NewVT, PredAsVector);
8650}
8651
8653 const ARMSubtarget *ST) {
8654 EVT VT = Op.getValueType();
8655 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8656 ArrayRef<int> ShuffleMask = SVN->getMask();
8657
8658 assert(ST->hasMVEIntegerOps() &&
8659 "No support for vector shuffle of boolean predicates");
8660
8661 SDValue V1 = Op.getOperand(0);
8662 SDValue V2 = Op.getOperand(1);
8663 SDLoc dl(Op);
8664 if (isReverseMask(ShuffleMask, VT)) {
8665 SDValue cast = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, V1);
8666 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, cast);
8667 SDValue srl = DAG.getNode(ISD::SRL, dl, MVT::i32, rbit,
8668 DAG.getConstant(16, dl, MVT::i32));
8669 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, srl);
8670 }
8671
8672 // Until we can come up with optimised cases for every single vector
8673 // shuffle in existence we have chosen the least painful strategy. This is
8674 // to essentially promote the boolean predicate to a 8-bit integer, where
8675 // each predicate represents a byte. Then we fall back on a normal integer
8676 // vector shuffle and convert the result back into a predicate vector. In
8677 // many cases the generated code might be even better than scalar code
8678 // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit
8679 // fields in a register into 8 other arbitrary 2-bit fields!
8680 SDValue PredAsVector1 = PromoteMVEPredVector(dl, V1, VT, DAG);
8681 EVT NewVT = PredAsVector1.getValueType();
8682 SDValue PredAsVector2 = V2.isUndef() ? DAG.getUNDEF(NewVT)
8683 : PromoteMVEPredVector(dl, V2, VT, DAG);
8684 assert(PredAsVector2.getValueType() == NewVT &&
8685 "Expected identical vector type in expanded i1 shuffle!");
8686
8687 // Do the shuffle!
8688 SDValue Shuffled = DAG.getVectorShuffle(NewVT, dl, PredAsVector1,
8689 PredAsVector2, ShuffleMask);
8690
8691 // Now return the result of comparing the shuffled vector with zero,
8692 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1
8693 // we convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
8694 if (VT == MVT::v2i1) {
8695 SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Shuffled);
8696 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
8697 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8698 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
8699 }
8700 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
8701 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8702}
8703
8705 ArrayRef<int> ShuffleMask,
8706 SelectionDAG &DAG) {
8707 // Attempt to lower the vector shuffle using as many whole register movs as
8708 // possible. This is useful for types smaller than 32bits, which would
8709 // often otherwise become a series for grp movs.
8710 SDLoc dl(Op);
8711 EVT VT = Op.getValueType();
8712 if (VT.getScalarSizeInBits() >= 32)
8713 return SDValue();
8714
8715 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8716 "Unexpected vector type");
8717 int NumElts = VT.getVectorNumElements();
8718 int QuarterSize = NumElts / 4;
8719 // The four final parts of the vector, as i32's
8720 SDValue Parts[4];
8721
8722 // Look for full lane vmovs like <0,1,2,3> or <u,5,6,7> etc, (but not
8723 // <u,u,u,u>), returning the vmov lane index
8724 auto getMovIdx = [](ArrayRef<int> ShuffleMask, int Start, int Length) {
8725 // Detect which mov lane this would be from the first non-undef element.
8726 int MovIdx = -1;
8727 for (int i = 0; i < Length; i++) {
8728 if (ShuffleMask[Start + i] >= 0) {
8729 if (ShuffleMask[Start + i] % Length != i)
8730 return -1;
8731 MovIdx = ShuffleMask[Start + i] / Length;
8732 break;
8733 }
8734 }
8735 // If all items are undef, leave this for other combines
8736 if (MovIdx == -1)
8737 return -1;
8738 // Check the remaining values are the correct part of the same mov
8739 for (int i = 1; i < Length; i++) {
8740 if (ShuffleMask[Start + i] >= 0 &&
8741 (ShuffleMask[Start + i] / Length != MovIdx ||
8742 ShuffleMask[Start + i] % Length != i))
8743 return -1;
8744 }
8745 return MovIdx;
8746 };
8747
8748 for (int Part = 0; Part < 4; ++Part) {
8749 // Does this part look like a mov
8750 int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);
8751 if (Elt != -1) {
8752 SDValue Input = Op->getOperand(0);
8753 if (Elt >= 4) {
8754 Input = Op->getOperand(1);
8755 Elt -= 4;
8756 }
8757 SDValue BitCast = DAG.getBitcast(MVT::v4f32, Input);
8758 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, BitCast,
8759 DAG.getConstant(Elt, dl, MVT::i32));
8760 }
8761 }
8762
8763 // Nothing interesting found, just return
8764 if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])
8765 return SDValue();
8766
8767 // The other parts need to be built with the old shuffle vector, cast to a
8768 // v4i32 and extract_vector_elts
8769 if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {
8770 SmallVector<int, 16> NewShuffleMask;
8771 for (int Part = 0; Part < 4; ++Part)
8772 for (int i = 0; i < QuarterSize; i++)
8773 NewShuffleMask.push_back(
8774 Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
8775 SDValue NewShuffle = DAG.getVectorShuffle(
8776 VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
8777 SDValue BitCast = DAG.getBitcast(MVT::v4f32, NewShuffle);
8778
8779 for (int Part = 0; Part < 4; ++Part)
8780 if (!Parts[Part])
8781 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32,
8782 BitCast, DAG.getConstant(Part, dl, MVT::i32));
8783 }
8784 // Build a vector out of the various parts and bitcast it back to the original
8785 // type.
8786 SDValue NewVec = DAG.getNode(ARMISD::BUILD_VECTOR, dl, MVT::v4f32, Parts);
8787 return DAG.getBitcast(VT, NewVec);
8788}
8789
8791 ArrayRef<int> ShuffleMask,
8792 SelectionDAG &DAG) {
8793 SDValue V1 = Op.getOperand(0);
8794 SDValue V2 = Op.getOperand(1);
8795 EVT VT = Op.getValueType();
8796 unsigned NumElts = VT.getVectorNumElements();
8797
8798 // An One-Off Identity mask is one that is mostly an identity mask from as
8799 // single source but contains a single element out-of-place, either from a
8800 // different vector or from another position in the same vector. As opposed to
8801 // lowering this via a ARMISD::BUILD_VECTOR we can generate an extract/insert
8802 // pair directly.
8803 auto isOneOffIdentityMask = [](ArrayRef<int> Mask, EVT VT, int BaseOffset,
8804 int &OffElement) {
8805 OffElement = -1;
8806 int NonUndef = 0;
8807 for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
8808 if (Mask[i] == -1)
8809 continue;
8810 NonUndef++;
8811 if (Mask[i] != i + BaseOffset) {
8812 if (OffElement == -1)
8813 OffElement = i;
8814 else
8815 return false;
8816 }
8817 }
8818 return NonUndef > 2 && OffElement != -1;
8819 };
8820 int OffElement;
8821 SDValue VInput;
8822 if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))
8823 VInput = V1;
8824 else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))
8825 VInput = V2;
8826 else
8827 return SDValue();
8828
8829 SDLoc dl(Op);
8830 EVT SVT = VT.getScalarType() == MVT::i8 || VT.getScalarType() == MVT::i16
8831 ? MVT::i32
8832 : VT.getScalarType();
8833 SDValue Elt = DAG.getNode(
8834 ISD::EXTRACT_VECTOR_ELT, dl, SVT,
8835 ShuffleMask[OffElement] < (int)NumElts ? V1 : V2,
8836 DAG.getVectorIdxConstant(ShuffleMask[OffElement] % NumElts, dl));
8837 return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, VInput, Elt,
8838 DAG.getVectorIdxConstant(OffElement % NumElts, dl));
8839}
8840
8842 const ARMSubtarget *ST) {
8843 SDValue V1 = Op.getOperand(0);
8844 SDValue V2 = Op.getOperand(1);
8845 SDLoc dl(Op);
8846 EVT VT = Op.getValueType();
8847 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8848 unsigned EltSize = VT.getScalarSizeInBits();
8849
8850 if (ST->hasMVEIntegerOps() && EltSize == 1)
8851 return LowerVECTOR_SHUFFLE_i1(Op, DAG, ST);
8852
8853 // Convert shuffles that are directly supported on NEON to target-specific
8854 // DAG nodes, instead of keeping them as shuffles and matching them again
8855 // during code selection. This is more efficient and avoids the possibility
8856 // of inconsistencies between legalization and selection.
8857 // FIXME: floating-point vectors should be canonicalized to integer vectors
8858 // of the same time so that they get CSEd properly.
8859 ArrayRef<int> ShuffleMask = SVN->getMask();
8860
8861 if (EltSize <= 32) {
8862 if (SVN->isSplat()) {
8863 int Lane = SVN->getSplatIndex();
8864 // If this is undef splat, generate it via "just" vdup, if possible.
8865 if (Lane == -1) Lane = 0;
8866
8867 // Test if V1 is a SCALAR_TO_VECTOR.
8868 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
8869 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8870 }
8871 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
8872 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
8873 // reaches it).
8874 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
8875 !isa<ConstantSDNode>(V1.getOperand(0))) {
8876 bool IsScalarToVector = true;
8877 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
8878 if (!V1.getOperand(i).isUndef()) {
8879 IsScalarToVector = false;
8880 break;
8881 }
8882 if (IsScalarToVector)
8883 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8884 }
8885 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
8886 DAG.getConstant(Lane, dl, MVT::i32));
8887 }
8888
8889 bool ReverseVEXT = false;
8890 unsigned Imm = 0;
8891 if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
8892 if (ReverseVEXT)
8893 std::swap(V1, V2);
8894 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
8895 DAG.getConstant(Imm, dl, MVT::i32));
8896 }
8897
8898 if (isVREVMask(ShuffleMask, VT, 64))
8899 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
8900 if (isVREVMask(ShuffleMask, VT, 32))
8901 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
8902 if (isVREVMask(ShuffleMask, VT, 16))
8903 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
8904
8905 if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
8906 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
8907 DAG.getConstant(Imm, dl, MVT::i32));
8908 }
8909
8910 // Check for Neon shuffles that modify both input vectors in place.
8911 // If both results are used, i.e., if there are two shuffles with the same
8912 // source operands and with masks corresponding to both results of one of
8913 // these operations, DAG memoization will ensure that a single node is
8914 // used for both shuffles.
8915 unsigned WhichResult = 0;
8916 bool isV_UNDEF = false;
8917 if (ST->hasNEON()) {
8918 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8919 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
8920 if (isV_UNDEF)
8921 V2 = V1;
8922 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
8923 .getValue(WhichResult);
8924 }
8925 }
8926 if (ST->hasMVEIntegerOps()) {
8927 if (isVMOVNMask(ShuffleMask, VT, false, false))
8928 return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1,
8929 DAG.getConstant(0, dl, MVT::i32));
8930 if (isVMOVNMask(ShuffleMask, VT, true, false))
8931 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2,
8932 DAG.getConstant(1, dl, MVT::i32));
8933 if (isVMOVNMask(ShuffleMask, VT, true, true))
8934 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V1,
8935 DAG.getConstant(1, dl, MVT::i32));
8936 }
8937
8938 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
8939 // shuffles that produce a result larger than their operands with:
8940 // shuffle(concat(v1, undef), concat(v2, undef))
8941 // ->
8942 // shuffle(concat(v1, v2), undef)
8943 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
8944 //
8945 // This is useful in the general case, but there are special cases where
8946 // native shuffles produce larger results: the two-result ops.
8947 //
8948 // Look through the concat when lowering them:
8949 // shuffle(concat(v1, v2), undef)
8950 // ->
8951 // concat(VZIP(v1, v2):0, :1)
8952 //
8953 if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
8954 SDValue SubV1 = V1->getOperand(0);
8955 SDValue SubV2 = V1->getOperand(1);
8956 EVT SubVT = SubV1.getValueType();
8957
8958 // We expect these to have been canonicalized to -1.
8959 assert(llvm::all_of(ShuffleMask, [&](int i) {
8960 return i < (int)VT.getVectorNumElements();
8961 }) && "Unexpected shuffle index into UNDEF operand!");
8962
8963 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8964 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
8965 if (isV_UNDEF)
8966 SubV2 = SubV1;
8967 assert((WhichResult == 0) &&
8968 "In-place shuffle of concat can only have one result!");
8969 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
8970 SubV1, SubV2);
8971 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
8972 Res.getValue(1));
8973 }
8974 }
8975 }
8976
8977 if (ST->hasMVEIntegerOps() && EltSize <= 32) {
8978 if (SDValue V = LowerVECTOR_SHUFFLEUsingOneOff(Op, ShuffleMask, DAG))
8979 return V;
8980
8981 for (bool Top : {false, true}) {
8982 for (bool SingleSource : {false, true}) {
8983 if (isTruncMask(ShuffleMask, VT, Top, SingleSource)) {
8984 MVT FromSVT = MVT::getIntegerVT(EltSize * 2);
8985 MVT FromVT = MVT::getVectorVT(FromSVT, ShuffleMask.size() / 2);
8986 SDValue Lo = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT, V1);
8987 SDValue Hi = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT,
8988 SingleSource ? V1 : V2);
8989 if (Top) {
8990 SDValue Amt = DAG.getConstant(EltSize, dl, FromVT);
8991 Lo = DAG.getNode(ISD::SRL, dl, FromVT, Lo, Amt);
8992 Hi = DAG.getNode(ISD::SRL, dl, FromVT, Hi, Amt);
8993 }
8994 return DAG.getNode(ARMISD::MVETRUNC, dl, VT, Lo, Hi);
8995 }
8996 }
8997 }
8998 }
8999
9000 // If the shuffle is not directly supported and it has 4 elements, use
9001 // the PerfectShuffle-generated table to synthesize it from other shuffles.
9002 unsigned NumElts = VT.getVectorNumElements();
9003 if (NumElts == 4) {
9004 unsigned PFIndexes[4];
9005 for (unsigned i = 0; i != 4; ++i) {
9006 if (ShuffleMask[i] < 0)
9007 PFIndexes[i] = 8;
9008 else
9009 PFIndexes[i] = ShuffleMask[i];
9010 }
9011
9012 // Compute the index in the perfect shuffle table.
9013 unsigned PFTableIndex =
9014 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
9015 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
9016 unsigned Cost = (PFEntry >> 30);
9017
9018 if (Cost <= 4) {
9019 if (ST->hasNEON())
9020 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9021 else if (isLegalMVEShuffleOp(PFEntry)) {
9022 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9023 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9024 unsigned PFEntryLHS = PerfectShuffleTable[LHSID];
9025 unsigned PFEntryRHS = PerfectShuffleTable[RHSID];
9026 if (isLegalMVEShuffleOp(PFEntryLHS) && isLegalMVEShuffleOp(PFEntryRHS))
9027 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9028 }
9029 }
9030 }
9031
9032 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
9033 if (EltSize >= 32) {
9034 // Do the expansion with floating-point types, since that is what the VFP
9035 // registers are defined to use, and since i64 is not legal.
9036 EVT EltVT = EVT::getFloatingPointVT(EltSize);
9037 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
9038 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
9039 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
9041 for (unsigned i = 0; i < NumElts; ++i) {
9042 if (ShuffleMask[i] < 0)
9043 Ops.push_back(DAG.getUNDEF(EltVT));
9044 else
9045 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
9046 ShuffleMask[i] < (int)NumElts ? V1 : V2,
9047 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
9048 dl, MVT::i32)));
9049 }
9050 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
9051 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
9052 }
9053
9054 if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
9055 isReverseMask(ShuffleMask, VT))
9056 return LowerReverse_VECTOR_SHUFFLE(Op, DAG);
9057
9058 if (ST->hasNEON() && VT == MVT::v8i8)
9059 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
9060 return NewOp;
9061
9062 if (ST->hasMVEIntegerOps())
9063 if (SDValue NewOp = LowerVECTOR_SHUFFLEUsingMovs(Op, ShuffleMask, DAG))
9064 return NewOp;
9065
9066 return SDValue();
9067}
9068
9070 const ARMSubtarget *ST) {
9071 EVT VecVT = Op.getOperand(0).getValueType();
9072 SDLoc dl(Op);
9073
9074 assert(ST->hasMVEIntegerOps() &&
9075 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
9076
9077 SDValue Conv =
9078 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9079 unsigned Lane = Op.getConstantOperandVal(2);
9080 unsigned LaneWidth =
9082 unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
9083 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32,
9084 Op.getOperand(1), DAG.getValueType(MVT::i1));
9085 SDValue BFI = DAG.getNode(ARMISD::BFI, dl, MVT::i32, Conv, Ext,
9086 DAG.getConstant(~Mask, dl, MVT::i32));
9087 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), BFI);
9088}
9089
9090SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
9091 SelectionDAG &DAG) const {
9092 // INSERT_VECTOR_ELT is legal only for immediate indexes.
9093 SDValue Lane = Op.getOperand(2);
9094 if (!isa<ConstantSDNode>(Lane))
9095 return SDValue();
9096
9097 SDValue Elt = Op.getOperand(1);
9098 EVT EltVT = Elt.getValueType();
9099
9100 if (Subtarget->hasMVEIntegerOps() &&
9101 Op.getValueType().getScalarSizeInBits() == 1)
9102 return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);
9103
9104 if (getTypeAction(*DAG.getContext(), EltVT) ==
9106 // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
9107 // but the type system will try to do that if we don't intervene.
9108 // Reinterpret any such vector-element insertion as one with the
9109 // corresponding integer types.
9110
9111 SDLoc dl(Op);
9112
9113 EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
9114 assert(getTypeAction(*DAG.getContext(), IEltVT) !=
9116
9117 SDValue VecIn = Op.getOperand(0);
9118 EVT VecVT = VecIn.getValueType();
9119 EVT IVecVT = EVT::getVectorVT(*DAG.getContext(), IEltVT,
9120 VecVT.getVectorNumElements());
9121
9122 SDValue IElt = DAG.getNode(ISD::BITCAST, dl, IEltVT, Elt);
9123 SDValue IVecIn = DAG.getNode(ISD::BITCAST, dl, IVecVT, VecIn);
9124 SDValue IVecOut = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVecVT,
9125 IVecIn, IElt, Lane);
9126 return DAG.getNode(ISD::BITCAST, dl, VecVT, IVecOut);
9127 }
9128
9129 return Op;
9130}
9131
9133 const ARMSubtarget *ST) {
9134 EVT VecVT = Op.getOperand(0).getValueType();
9135 SDLoc dl(Op);
9136
9137 assert(ST->hasMVEIntegerOps() &&
9138 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
9139
9140 SDValue Conv =
9141 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9142 unsigned Lane = Op.getConstantOperandVal(1);
9143 unsigned LaneWidth =
9145 SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv,
9146 DAG.getConstant(Lane * LaneWidth, dl, MVT::i32));
9147 return Shift;
9148}
9149
9151 const ARMSubtarget *ST) {
9152 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
9153 SDValue Lane = Op.getOperand(1);
9154 if (!isa<ConstantSDNode>(Lane))
9155 return SDValue();
9156
9157 SDValue Vec = Op.getOperand(0);
9158 EVT VT = Vec.getValueType();
9159
9160 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9161 return LowerEXTRACT_VECTOR_ELT_i1(Op, DAG, ST);
9162
9163 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
9164 SDLoc dl(Op);
9165 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
9166 }
9167
9168 return Op;
9169}
9170
9172 const ARMSubtarget *ST) {
9173 SDLoc dl(Op);
9174 assert(Op.getValueType().getScalarSizeInBits() == 1 &&
9175 "Unexpected custom CONCAT_VECTORS lowering");
9177 "Unexpected custom CONCAT_VECTORS lowering");
9178 assert(ST->hasMVEIntegerOps() &&
9179 "CONCAT_VECTORS lowering only supported for MVE");
9180
9181 auto ConcatPair = [&](SDValue V1, SDValue V2) {
9182 EVT Op1VT = V1.getValueType();
9183 EVT Op2VT = V2.getValueType();
9184 assert(Op1VT == Op2VT && "Operand types don't match!");
9185 assert((Op1VT == MVT::v2i1 || Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) &&
9186 "Unexpected i1 concat operations!");
9187 EVT VT = Op1VT.getDoubleNumVectorElementsVT(*DAG.getContext());
9188
9189 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9190 SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
9191
9192 // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
9193 // promoted to v8i16, etc.
9194 MVT ElType =
9196 unsigned NumElts = 2 * Op1VT.getVectorNumElements();
9197
9198 EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
9199 if (Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) {
9200 // Use MVETRUNC to truncate the combined NewV1::NewV2 into the smaller
9201 // ConcatVT.
9202 SDValue ConVec =
9203 DAG.getNode(ARMISD::MVETRUNC, dl, ConcatVT, NewV1, NewV2);
9204 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9205 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9206 }
9207
9208 // Extract the vector elements from Op1 and Op2 one by one and truncate them
9209 // to be the right size for the destination. For example, if Op1 is v4i1
9210 // then the promoted vector is v4i32. The result of concatenation gives a
9211 // v8i1, which when promoted is v8i16. That means each i32 element from Op1
9212 // needs truncating to i16 and inserting in the result.
9213 auto ExtractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
9214 EVT NewVT = NewV.getValueType();
9215 EVT ConcatVT = ConVec.getValueType();
9216 unsigned ExtScale = 1;
9217 if (NewVT == MVT::v2f64) {
9218 NewV = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, NewV);
9219 ExtScale = 2;
9220 }
9221 for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
9222 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
9223 DAG.getIntPtrConstant(i * ExtScale, dl));
9224 ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
9225 DAG.getConstant(j, dl, MVT::i32));
9226 }
9227 return ConVec;
9228 };
9229 unsigned j = 0;
9230 SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
9231 ConVec = ExtractInto(NewV1, ConVec, j);
9232 ConVec = ExtractInto(NewV2, ConVec, j);
9233
9234 // Now return the result of comparing the subvector with zero, which will
9235 // generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9236 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9237 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9238 };
9239
9240 // Concat each pair of subvectors and pack into the lower half of the array.
9241 SmallVector<SDValue> ConcatOps(Op->ops());
9242 while (ConcatOps.size() > 1) {
9243 for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
9244 SDValue V1 = ConcatOps[I];
9245 SDValue V2 = ConcatOps[I + 1];
9246 ConcatOps[I / 2] = ConcatPair(V1, V2);
9247 }
9248 ConcatOps.resize(ConcatOps.size() / 2);
9249 }
9250 return ConcatOps[0];
9251}
9252
9254 const ARMSubtarget *ST) {
9255 EVT VT = Op->getValueType(0);
9256 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9257 return LowerCONCAT_VECTORS_i1(Op, DAG, ST);
9258
9259 // The only time a CONCAT_VECTORS operation can have legal types is when
9260 // two 64-bit vectors are concatenated to a 128-bit vector.
9261 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
9262 "unexpected CONCAT_VECTORS");
9263 SDLoc dl(Op);
9264 SDValue Val = DAG.getUNDEF(MVT::v2f64);
9265 SDValue Op0 = Op.getOperand(0);
9266 SDValue Op1 = Op.getOperand(1);
9267 if (!Op0.isUndef())
9268 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9269 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
9270 DAG.getIntPtrConstant(0, dl));
9271 if (!Op1.isUndef())
9272 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9273 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
9274 DAG.getIntPtrConstant(1, dl));
9275 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
9276}
9277
9279 const ARMSubtarget *ST) {
9280 SDValue V1 = Op.getOperand(0);
9281 SDValue V2 = Op.getOperand(1);
9282 SDLoc dl(Op);
9283 EVT VT = Op.getValueType();
9284 EVT Op1VT = V1.getValueType();
9285 unsigned NumElts = VT.getVectorNumElements();
9286 unsigned Index = V2->getAsZExtVal();
9287
9288 assert(VT.getScalarSizeInBits() == 1 &&
9289 "Unexpected custom EXTRACT_SUBVECTOR lowering");
9290 assert(ST->hasMVEIntegerOps() &&
9291 "EXTRACT_SUBVECTOR lowering only supported for MVE");
9292
9293 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9294
9295 // We now have Op1 promoted to a vector of integers, where v8i1 gets
9296 // promoted to v8i16, etc.
9297
9299
9300 if (NumElts == 2) {
9301 EVT SubVT = MVT::v4i32;
9302 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9303 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j += 2) {
9304 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9305 DAG.getIntPtrConstant(i, dl));
9306 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9307 DAG.getConstant(j, dl, MVT::i32));
9308 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9309 DAG.getConstant(j + 1, dl, MVT::i32));
9310 }
9311 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, SubVec,
9312 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9313 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
9314 }
9315
9316 EVT SubVT = MVT::getVectorVT(ElType, NumElts);
9317 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9318 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
9319 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9320 DAG.getIntPtrConstant(i, dl));
9321 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9322 DAG.getConstant(j, dl, MVT::i32));
9323 }
9324
9325 // Now return the result of comparing the subvector with zero,
9326 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9327 return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec,
9328 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9329}
9330
9331// Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0).
9333 const ARMSubtarget *ST) {
9334 assert(ST->hasMVEIntegerOps() && "Expected MVE!");
9335 EVT VT = N->getValueType(0);
9336 assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&
9337 "Expected a vector i1 type!");
9338 SDValue Op = N->getOperand(0);
9339 EVT FromVT = Op.getValueType();
9340 SDLoc DL(N);
9341
9342 SDValue And =
9343 DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT));
9344 return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT),
9345 DAG.getCondCode(ISD::SETNE));
9346}
9347
9349 const ARMSubtarget *Subtarget) {
9350 if (!Subtarget->hasMVEIntegerOps())
9351 return SDValue();
9352
9353 EVT ToVT = N->getValueType(0);
9354 if (ToVT.getScalarType() == MVT::i1)
9355 return LowerTruncatei1(N, DAG, Subtarget);
9356
9357 // MVE does not have a single instruction to perform the truncation of a v4i32
9358 // into the lower half of a v8i16, in the same way that a NEON vmovn would.
9359 // Most of the instructions in MVE follow the 'Beats' system, where moving
9360 // values from different lanes is usually something that the instructions
9361 // avoid.
9362 //
9363 // Instead it has top/bottom instructions such as VMOVLT/B and VMOVNT/B,
9364 // which take a the top/bottom half of a larger lane and extend it (or do the
9365 // opposite, truncating into the top/bottom lane from a larger lane). Note
9366 // that because of the way we widen lanes, a v4i16 is really a v4i32 using the
9367 // bottom 16bits from each vector lane. This works really well with T/B
9368 // instructions, but that doesn't extend to v8i32->v8i16 where the lanes need
9369 // to move order.
9370 //
9371 // But truncates and sext/zext are always going to be fairly common from llvm.
9372 // We have several options for how to deal with them:
9373 // - Wherever possible combine them into an instruction that makes them
9374 // "free". This includes loads/stores, which can perform the trunc as part
9375 // of the memory operation. Or certain shuffles that can be turned into
9376 // VMOVN/VMOVL.
9377 // - Lane Interleaving to transform blocks surrounded by ext/trunc. So
9378 // trunc(mul(sext(a), sext(b))) may become
9379 // VMOVNT(VMUL(VMOVLB(a), VMOVLB(b)), VMUL(VMOVLT(a), VMOVLT(b))). (Which in
9380 // this case can use VMULL). This is performed in the
9381 // MVELaneInterleavingPass.
9382 // - Otherwise we have an option. By default we would expand the
9383 // zext/sext/trunc into a series of lane extract/inserts going via GPR
9384 // registers. One for each vector lane in the vector. This can obviously be
9385 // very expensive.
9386 // - The other option is to use the fact that loads/store can extend/truncate
9387 // to turn a trunc into two truncating stack stores and a stack reload. This
9388 // becomes 3 back-to-back memory operations, but at least that is less than
9389 // all the insert/extracts.
9390 //
9391 // In order to do the last, we convert certain trunc's into MVETRUNC, which
9392 // are either optimized where they can be, or eventually lowered into stack
9393 // stores/loads. This prevents us from splitting a v8i16 trunc into two stores
9394 // two early, where other instructions would be better, and stops us from
9395 // having to reconstruct multiple buildvector shuffles into loads/stores.
9396 if (ToVT != MVT::v8i16 && ToVT != MVT::v16i8)
9397 return SDValue();
9398 EVT FromVT = N->getOperand(0).getValueType();
9399 if (FromVT != MVT::v8i32 && FromVT != MVT::v16i16)
9400 return SDValue();
9401
9402 SDValue Lo, Hi;
9403 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
9404 SDLoc DL(N);
9405 return DAG.getNode(ARMISD::MVETRUNC, DL, ToVT, Lo, Hi);
9406}
9407
9409 const ARMSubtarget *Subtarget) {
9410 if (!Subtarget->hasMVEIntegerOps())
9411 return SDValue();
9412
9413 // See LowerTruncate above for an explanation of MVEEXT/MVETRUNC.
9414
9415 EVT ToVT = N->getValueType(0);
9416 if (ToVT != MVT::v16i32 && ToVT != MVT::v8i32 && ToVT != MVT::v16i16)
9417 return SDValue();
9418 SDValue Op = N->getOperand(0);
9419 EVT FromVT = Op.getValueType();
9420 if (FromVT != MVT::v8i16 && FromVT != MVT::v16i8)
9421 return SDValue();
9422
9423 SDLoc DL(N);
9424 EVT ExtVT = ToVT.getHalfNumVectorElementsVT(*DAG.getContext());
9425 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8)
9426 ExtVT = MVT::v8i16;
9427
9428 unsigned Opcode =
9430 SDValue Ext = DAG.getNode(Opcode, DL, DAG.getVTList(ExtVT, ExtVT), Op);
9431 SDValue Ext1 = Ext.getValue(1);
9432
9433 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8) {
9434 Ext = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext);
9435 Ext1 = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext1);
9436 }
9437
9438 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Ext, Ext1);
9439}
9440
9441/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
9442/// element has been zero/sign-extended, depending on the isSigned parameter,
9443/// from an integer type half its size.
9445 bool isSigned) {
9446 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
9447 EVT VT = N->getValueType(0);
9448 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
9449 SDNode *BVN = N->getOperand(0).getNode();
9450 if (BVN->getValueType(0) != MVT::v4i32 ||
9451 BVN->getOpcode() != ISD::BUILD_VECTOR)
9452 return false;
9453 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9454 unsigned HiElt = 1 - LoElt;
9455 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
9456 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
9457 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
9458 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
9459 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
9460 return false;
9461 if (isSigned) {
9462 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
9463 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
9464 return true;
9465 } else {
9466 if (Hi0->isZero() && Hi1->isZero())
9467 return true;
9468 }
9469 return false;
9470 }
9471
9472 if (N->getOpcode() != ISD::BUILD_VECTOR)
9473 return false;
9474
9475 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
9476 SDNode *Elt = N->getOperand(i).getNode();
9477 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
9478 unsigned EltSize = VT.getScalarSizeInBits();
9479 unsigned HalfSize = EltSize / 2;
9480 if (isSigned) {
9481 if (!isIntN(HalfSize, C->getSExtValue()))
9482 return false;
9483 } else {
9484 if (!isUIntN(HalfSize, C->getZExtValue()))
9485 return false;
9486 }
9487 continue;
9488 }
9489 return false;
9490 }
9491
9492 return true;
9493}
9494
9495/// isSignExtended - Check if a node is a vector value that is sign-extended
9496/// or a constant BUILD_VECTOR with sign-extended elements.
9498 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
9499 return true;
9500 if (isExtendedBUILD_VECTOR(N, DAG, true))
9501 return true;
9502 return false;
9503}
9504
9505/// isZeroExtended - Check if a node is a vector value that is zero-extended (or
9506/// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
9508 if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
9510 return true;
9511 if (isExtendedBUILD_VECTOR(N, DAG, false))
9512 return true;
9513 return false;
9514}
9515
9516static EVT getExtensionTo64Bits(const EVT &OrigVT) {
9517 if (OrigVT.getSizeInBits() >= 64)
9518 return OrigVT;
9519
9520 assert(OrigVT.isSimple() && "Expecting a simple value type");
9521
9522 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
9523 switch (OrigSimpleTy) {
9524 default: llvm_unreachable("Unexpected Vector Type");
9525 case MVT::v2i8:
9526 case MVT::v2i16:
9527 return MVT::v2i32;
9528 case MVT::v4i8:
9529 return MVT::v4i16;
9530 }
9531}
9532
9533/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
9534/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
9535/// We insert the required extension here to get the vector to fill a D register.
9537 const EVT &OrigTy,
9538 const EVT &ExtTy,
9539 unsigned ExtOpcode) {
9540 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
9541 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
9542 // 64-bits we need to insert a new extension so that it will be 64-bits.
9543 assert(ExtTy.is128BitVector() && "Unexpected extension size");
9544 if (OrigTy.getSizeInBits() >= 64)
9545 return N;
9546
9547 // Must extend size to at least 64 bits to be used as an operand for VMULL.
9548 EVT NewVT = getExtensionTo64Bits(OrigTy);
9549
9550 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
9551}
9552
9553/// SkipLoadExtensionForVMULL - return a load of the original vector size that
9554/// does not do any sign/zero extension. If the original vector is less
9555/// than 64 bits, an appropriate extension will be added after the load to
9556/// reach a total size of 64 bits. We have to add the extension separately
9557/// because ARM does not have a sign/zero extending load for vectors.
9559 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
9560
9561 // The load already has the right type.
9562 if (ExtendedTy == LD->getMemoryVT())
9563 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
9564 LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),
9565 LD->getMemOperand()->getFlags());
9566
9567 // We need to create a zextload/sextload. We cannot just create a load
9568 // followed by a zext/zext node because LowerMUL is also run during normal
9569 // operation legalization where we can't create illegal types.
9570 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
9571 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
9572 LD->getMemoryVT(), LD->getAlign(),
9573 LD->getMemOperand()->getFlags());
9574}
9575
9576/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
9577/// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return
9578/// the unextended value. The unextended vector should be 64 bits so that it can
9579/// be used as an operand to a VMULL instruction. If the original vector size
9580/// before extension is less than 64 bits we add a an extension to resize
9581/// the vector to 64 bits.
9583 if (N->getOpcode() == ISD::SIGN_EXTEND ||
9584 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
9585 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
9586 N->getOperand(0)->getValueType(0),
9587 N->getValueType(0),
9588 N->getOpcode());
9589
9590 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9591 assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&
9592 "Expected extending load");
9593
9594 SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
9595 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
9596 unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9597 SDValue extLoad =
9598 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
9599 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
9600
9601 return newLoad;
9602 }
9603
9604 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
9605 // have been legalized as a BITCAST from v4i32.
9606 if (N->getOpcode() == ISD::BITCAST) {
9607 SDNode *BVN = N->getOperand(0).getNode();
9609 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
9610 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9611 return DAG.getBuildVector(
9612 MVT::v2i32, SDLoc(N),
9613 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
9614 }
9615 // Construct a new BUILD_VECTOR with elements truncated to half the size.
9616 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
9617 EVT VT = N->getValueType(0);
9618 unsigned EltSize = VT.getScalarSizeInBits() / 2;
9619 unsigned NumElts = VT.getVectorNumElements();
9620 MVT TruncVT = MVT::getIntegerVT(EltSize);
9622 SDLoc dl(N);
9623 for (unsigned i = 0; i != NumElts; ++i) {
9624 const APInt &CInt = N->getConstantOperandAPInt(i);
9625 // Element types smaller than 32 bits are not legal, so use i32 elements.
9626 // The values are implicitly truncated so sext vs. zext doesn't matter.
9627 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
9628 }
9629 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
9630}
9631
9632static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
9633 unsigned Opcode = N->getOpcode();
9634 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9635 SDNode *N0 = N->getOperand(0).getNode();
9636 SDNode *N1 = N->getOperand(1).getNode();
9637 return N0->hasOneUse() && N1->hasOneUse() &&
9638 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
9639 }
9640 return false;
9641}
9642
9643static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
9644 unsigned Opcode = N->getOpcode();
9645 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9646 SDNode *N0 = N->getOperand(0).getNode();
9647 SDNode *N1 = N->getOperand(1).getNode();
9648 return N0->hasOneUse() && N1->hasOneUse() &&
9649 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
9650 }
9651 return false;
9652}
9653
9655 // Multiplications are only custom-lowered for 128-bit vectors so that
9656 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
9657 EVT VT = Op.getValueType();
9658 assert(VT.is128BitVector() && VT.isInteger() &&
9659 "unexpected type for custom-lowering ISD::MUL");
9660 SDNode *N0 = Op.getOperand(0).getNode();
9661 SDNode *N1 = Op.getOperand(1).getNode();
9662 unsigned NewOpc = 0;
9663 bool isMLA = false;
9664 bool isN0SExt = isSignExtended(N0, DAG);
9665 bool isN1SExt = isSignExtended(N1, DAG);
9666 if (isN0SExt && isN1SExt)
9667 NewOpc = ARMISD::VMULLs;
9668 else {
9669 bool isN0ZExt = isZeroExtended(N0, DAG);
9670 bool isN1ZExt = isZeroExtended(N1, DAG);
9671 if (isN0ZExt && isN1ZExt)
9672 NewOpc = ARMISD::VMULLu;
9673 else if (isN1SExt || isN1ZExt) {
9674 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
9675 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
9676 if (isN1SExt && isAddSubSExt(N0, DAG)) {
9677 NewOpc = ARMISD::VMULLs;
9678 isMLA = true;
9679 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
9680 NewOpc = ARMISD::VMULLu;
9681 isMLA = true;
9682 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
9683 std::swap(N0, N1);
9684 NewOpc = ARMISD::VMULLu;
9685 isMLA = true;
9686 }
9687 }
9688
9689 if (!NewOpc) {
9690 if (VT == MVT::v2i64)
9691 // Fall through to expand this. It is not legal.
9692 return SDValue();
9693 else
9694 // Other vector multiplications are legal.
9695 return Op;
9696 }
9697 }
9698
9699 // Legalize to a VMULL instruction.
9700 SDLoc DL(Op);
9701 SDValue Op0;
9702 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
9703 if (!isMLA) {
9704 Op0 = SkipExtensionForVMULL(N0, DAG);
9706 Op1.getValueType().is64BitVector() &&
9707 "unexpected types for extended operands to VMULL");
9708 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
9709 }
9710
9711 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
9712 // isel lowering to take advantage of no-stall back to back vmul + vmla.
9713 // vmull q0, d4, d6
9714 // vmlal q0, d5, d6
9715 // is faster than
9716 // vaddl q0, d4, d5
9717 // vmovl q1, d6
9718 // vmul q0, q0, q1
9719 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
9720 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
9721 EVT Op1VT = Op1.getValueType();
9722 return DAG.getNode(N0->getOpcode(), DL, VT,
9723 DAG.getNode(NewOpc, DL, VT,
9724 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
9725 DAG.getNode(NewOpc, DL, VT,
9726 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
9727}
9728
9730 SelectionDAG &DAG) {
9731 // TODO: Should this propagate fast-math-flags?
9732
9733 // Convert to float
9734 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
9735 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
9736 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
9737 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
9738 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
9739 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
9740 // Get reciprocal estimate.
9741 // float4 recip = vrecpeq_f32(yf);
9742 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9743 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9744 Y);
9745 // Because char has a smaller range than uchar, we can actually get away
9746 // without any newton steps. This requires that we use a weird bias
9747 // of 0xb000, however (again, this has been exhaustively tested).
9748 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
9749 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
9750 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
9751 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
9752 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
9753 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
9754 // Convert back to short.
9755 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
9756 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
9757 return X;
9758}
9759
9761 SelectionDAG &DAG) {
9762 // TODO: Should this propagate fast-math-flags?
9763
9764 SDValue N2;
9765 // Convert to float.
9766 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
9767 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
9768 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
9769 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
9770 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9771 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9772
9773 // Use reciprocal estimate and one refinement step.
9774 // float4 recip = vrecpeq_f32(yf);
9775 // recip *= vrecpsq_f32(yf, recip);
9776 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9777 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9778 N1);
9779 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9780 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9781 N1, N2);
9782 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9783 // Because short has a smaller range than ushort, we can actually get away
9784 // with only a single newton step. This requires that we use a weird bias
9785 // of 89, however (again, this has been exhaustively tested).
9786 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
9787 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9788 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9789 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
9790 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9791 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9792 // Convert back to integer and return.
9793 // return vmovn_s32(vcvt_s32_f32(result));
9794 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9795 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9796 return N0;
9797}
9798
9800 const ARMSubtarget *ST) {
9801 EVT VT = Op.getValueType();
9802 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9803 "unexpected type for custom-lowering ISD::SDIV");
9804
9805 SDLoc dl(Op);
9806 SDValue N0 = Op.getOperand(0);
9807 SDValue N1 = Op.getOperand(1);
9808 SDValue N2, N3;
9809
9810 if (VT == MVT::v8i8) {
9811 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
9812 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
9813
9814 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9815 DAG.getIntPtrConstant(4, dl));
9816 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9817 DAG.getIntPtrConstant(4, dl));
9818 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9819 DAG.getIntPtrConstant(0, dl));
9820 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9821 DAG.getIntPtrConstant(0, dl));
9822
9823 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
9824 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
9825
9826 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9827 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9828
9829 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
9830 return N0;
9831 }
9832 return LowerSDIV_v4i16(N0, N1, dl, DAG);
9833}
9834
9836 const ARMSubtarget *ST) {
9837 // TODO: Should this propagate fast-math-flags?
9838 EVT VT = Op.getValueType();
9839 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9840 "unexpected type for custom-lowering ISD::UDIV");
9841
9842 SDLoc dl(Op);
9843 SDValue N0 = Op.getOperand(0);
9844 SDValue N1 = Op.getOperand(1);
9845 SDValue N2, N3;
9846
9847 if (VT == MVT::v8i8) {
9848 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
9849 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
9850
9851 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9852 DAG.getIntPtrConstant(4, dl));
9853 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9854 DAG.getIntPtrConstant(4, dl));
9855 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9856 DAG.getIntPtrConstant(0, dl));
9857 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9858 DAG.getIntPtrConstant(0, dl));
9859
9860 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
9861 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
9862
9863 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9864 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9865
9866 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
9867 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
9868 MVT::i32),
9869 N0);
9870 return N0;
9871 }
9872
9873 // v4i16 sdiv ... Convert to float.
9874 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
9875 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
9876 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
9877 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
9878 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9879 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9880
9881 // Use reciprocal estimate and two refinement steps.
9882 // float4 recip = vrecpeq_f32(yf);
9883 // recip *= vrecpsq_f32(yf, recip);
9884 // recip *= vrecpsq_f32(yf, recip);
9885 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9886 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9887 BN1);
9888 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9889 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9890 BN1, N2);
9891 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9892 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9893 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9894 BN1, N2);
9895 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9896 // Simply multiplying by the reciprocal estimate can leave us a few ulps
9897 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
9898 // and that it will never cause us to return an answer too large).
9899 // float4 result = as_float4(as_int4(xf*recip) + 2);
9900 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9901 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9902 N1 = DAG.getConstant(2, dl, MVT::v4i32);
9903 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9904 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9905 // Convert back to integer and return.
9906 // return vmovn_u32(vcvt_s32_f32(result));
9907 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9908 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9909 return N0;
9910}
9911
9913 SDNode *N = Op.getNode();
9914 EVT VT = N->getValueType(0);
9915 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
9916
9917 SDValue Carry = Op.getOperand(2);
9918
9919 SDLoc DL(Op);
9920
9921 SDValue Result;
9922 if (Op.getOpcode() == ISD::UADDO_CARRY) {
9923 // This converts the boolean value carry into the carry flag.
9924 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9925
9926 // Do the addition proper using the carry flag we wanted.
9927 Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
9928 Op.getOperand(1), Carry);
9929
9930 // Now convert the carry flag into a boolean value.
9931 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9932 } else {
9933 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
9934 // have to invert the carry first.
9935 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9936 DAG.getConstant(1, DL, MVT::i32), Carry);
9937 // This converts the boolean value carry into the carry flag.
9938 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9939
9940 // Do the subtraction proper using the carry flag we wanted.
9941 Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
9942 Op.getOperand(1), Carry);
9943
9944 // Now convert the carry flag into a boolean value.
9945 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9946 // But the carry returned by ARMISD::SUBE is not a borrow as expected
9947 // by ISD::USUBO_CARRY, so compute 1 - C.
9948 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9949 DAG.getConstant(1, DL, MVT::i32), Carry);
9950 }
9951
9952 // Return both values.
9953 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
9954}
9955
9956SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
9957 assert(Subtarget->isTargetDarwin());
9958
9959 // For iOS, we want to call an alternative entry point: __sincos_stret,
9960 // return values are passed via sret.
9961 SDLoc dl(Op);
9962 SDValue Arg = Op.getOperand(0);
9963 EVT ArgVT = Arg.getValueType();
9964 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
9965 auto PtrVT = getPointerTy(DAG.getDataLayout());
9966
9968 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9969
9970 // Pair of floats / doubles used to pass the result.
9971 Type *RetTy = StructType::get(ArgTy, ArgTy);
9972 auto &DL = DAG.getDataLayout();
9973
9975 bool ShouldUseSRet = Subtarget->isAPCS_ABI();
9976 SDValue SRet;
9977 if (ShouldUseSRet) {
9978 // Create stack object for sret.
9979 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
9980 const Align StackAlign = DL.getPrefTypeAlign(RetTy);
9981 int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
9982 SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
9983
9984 ArgListEntry Entry;
9985 Entry.Node = SRet;
9986 Entry.Ty = PointerType::getUnqual(RetTy->getContext());
9987 Entry.IsSExt = false;
9988 Entry.IsZExt = false;
9989 Entry.IsSRet = true;
9990 Args.push_back(Entry);
9992 }
9993
9994 ArgListEntry Entry;
9995 Entry.Node = Arg;
9996 Entry.Ty = ArgTy;
9997 Entry.IsSExt = false;
9998 Entry.IsZExt = false;
9999 Args.push_back(Entry);
10000
10001 RTLIB::Libcall LC =
10002 (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
10003 const char *LibcallName = getLibcallName(LC);
10005 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
10006
10008 CLI.setDebugLoc(dl)
10009 .setChain(DAG.getEntryNode())
10010 .setCallee(CC, RetTy, Callee, std::move(Args))
10011 .setDiscardResult(ShouldUseSRet);
10012 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10013
10014 if (!ShouldUseSRet)
10015 return CallResult.first;
10016
10017 SDValue LoadSin =
10018 DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
10019
10020 // Address of cos field.
10021 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
10022 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
10023 SDValue LoadCos =
10024 DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
10025
10026 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
10027 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
10028 LoadSin.getValue(0), LoadCos.getValue(0));
10029}
10030
10031SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
10032 bool Signed,
10033 SDValue &Chain) const {
10034 EVT VT = Op.getValueType();
10035 assert((VT == MVT::i32 || VT == MVT::i64) &&
10036 "unexpected type for custom lowering DIV");
10037 SDLoc dl(Op);
10038
10039 const auto &DL = DAG.getDataLayout();
10040 const auto &TLI = DAG.getTargetLoweringInfo();
10041
10042 const char *Name = nullptr;
10043 if (Signed)
10044 Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
10045 else
10046 Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
10047
10049
10051
10052 for (auto AI : {1, 0}) {
10053 ArgListEntry Arg;
10054 Arg.Node = Op.getOperand(AI);
10055 Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
10056 Args.push_back(Arg);
10057 }
10058
10059 CallLoweringInfo CLI(DAG);
10060 CLI.setDebugLoc(dl)
10061 .setChain(Chain)
10063 ES, std::move(Args));
10064
10065 return LowerCallTo(CLI).first;
10066}
10067
10068// This is a code size optimisation: return the original SDIV node to
10069// DAGCombiner when we don't want to expand SDIV into a sequence of
10070// instructions, and an empty node otherwise which will cause the
10071// SDIV to be expanded in DAGCombine.
10072SDValue
10073ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
10074 SelectionDAG &DAG,
10075 SmallVectorImpl<SDNode *> &Created) const {
10076 // TODO: Support SREM
10077 if (N->getOpcode() != ISD::SDIV)
10078 return SDValue();
10079
10080 const auto &ST = DAG.getSubtarget<ARMSubtarget>();
10081 const bool MinSize = ST.hasMinSize();
10082 const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
10083 : ST.hasDivideInARMMode();
10084
10085 // Don't touch vector types; rewriting this may lead to scalarizing
10086 // the int divs.
10087 if (N->getOperand(0).getValueType().isVector())
10088 return SDValue();
10089
10090 // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
10091 // hwdiv support for this to be really profitable.
10092 if (!(MinSize && HasDivide))
10093 return SDValue();
10094
10095 // ARM mode is a bit simpler than Thumb: we can handle large power
10096 // of 2 immediates with 1 mov instruction; no further checks required,
10097 // just return the sdiv node.
10098 if (!ST.isThumb())
10099 return SDValue(N, 0);
10100
10101 // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
10102 // and thus lose the code size benefits of a MOVS that requires only 2.
10103 // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
10104 // but as it's doing exactly this, it's not worth the trouble to get TTI.
10105 if (Divisor.sgt(128))
10106 return SDValue();
10107
10108 return SDValue(N, 0);
10109}
10110
10111SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
10112 bool Signed) const {
10113 assert(Op.getValueType() == MVT::i32 &&
10114 "unexpected type for custom lowering DIV");
10115 SDLoc dl(Op);
10116
10117 SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
10118 DAG.getEntryNode(), Op.getOperand(1));
10119
10120 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10121}
10122
10124 SDLoc DL(N);
10125 SDValue Op = N->getOperand(1);
10126 if (N->getValueType(0) == MVT::i32)
10127 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
10128 SDValue Lo, Hi;
10129 std::tie(Lo, Hi) = DAG.SplitScalar(Op, DL, MVT::i32, MVT::i32);
10130 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
10131 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
10132}
10133
10134void ARMTargetLowering::ExpandDIV_Windows(
10135 SDValue Op, SelectionDAG &DAG, bool Signed,
10137 const auto &DL = DAG.getDataLayout();
10138 const auto &TLI = DAG.getTargetLoweringInfo();
10139
10140 assert(Op.getValueType() == MVT::i64 &&
10141 "unexpected type for custom lowering DIV");
10142 SDLoc dl(Op);
10143
10144 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
10145
10146 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10147
10148 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
10149 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
10150 DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
10151 Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
10152
10153 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lower, Upper));
10154}
10155
10157 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
10158 EVT MemVT = LD->getMemoryVT();
10159 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10160 MemVT == MVT::v16i1) &&
10161 "Expected a predicate type!");
10162 assert(MemVT == Op.getValueType());
10163 assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
10164 "Expected a non-extending load");
10165 assert(LD->isUnindexed() && "Expected a unindexed load");
10166
10167 // The basic MVE VLDR on a v2i1/v4i1/v8i1 actually loads the entire 16bit
10168 // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
10169 // need to make sure that 8/4/2 bits are actually loaded into the correct
10170 // place, which means loading the value and then shuffling the values into
10171 // the bottom bits of the predicate.
10172 // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
10173 // for BE).
10174 // Speaking of BE, apparently the rest of llvm will assume a reverse order to
10175 // a natural VMSR(load), so needs to be reversed.
10176
10177 SDLoc dl(Op);
10178 SDValue Load = DAG.getExtLoad(
10179 ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
10181 LD->getMemOperand());
10182 SDValue Val = Load;
10183 if (DAG.getDataLayout().isBigEndian())
10184 Val = DAG.getNode(ISD::SRL, dl, MVT::i32,
10185 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Load),
10186 DAG.getConstant(32 - MemVT.getSizeInBits(), dl, MVT::i32));
10187 SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Val);
10188 if (MemVT != MVT::v16i1)
10189 Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred,
10190 DAG.getConstant(0, dl, MVT::i32));
10191 return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
10192}
10193
10194void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
10195 SelectionDAG &DAG) const {
10196 LoadSDNode *LD = cast<LoadSDNode>(N);
10197 EVT MemVT = LD->getMemoryVT();
10198 assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
10199
10200 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10201 !Subtarget->isThumb1Only() && LD->isVolatile() &&
10202 LD->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10203 SDLoc dl(N);
10205 ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
10206 {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
10207 SDValue Lo = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 0 : 1);
10208 SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0);
10209 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
10210 Results.append({Pair, Result.getValue(2)});
10211 }
10212}
10213
10215 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10216 EVT MemVT = ST->getMemoryVT();
10217 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10218 MemVT == MVT::v16i1) &&
10219 "Expected a predicate type!");
10220 assert(MemVT == ST->getValue().getValueType());
10221 assert(!ST->isTruncatingStore() && "Expected a non-extending store");
10222 assert(ST->isUnindexed() && "Expected a unindexed store");
10223
10224 // Only store the v2i1 or v4i1 or v8i1 worth of bits, via a buildvector with
10225 // top bits unset and a scalar store.
10226 SDLoc dl(Op);
10227 SDValue Build = ST->getValue();
10228 if (MemVT != MVT::v16i1) {
10230 for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++) {
10231 unsigned Elt = DAG.getDataLayout().isBigEndian()
10232 ? MemVT.getVectorNumElements() - I - 1
10233 : I;
10234 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build,
10235 DAG.getConstant(Elt, dl, MVT::i32)));
10236 }
10237 for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++)
10238 Ops.push_back(DAG.getUNDEF(MVT::i32));
10239 Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops);
10240 }
10241 SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
10242 if (MemVT == MVT::v16i1 && DAG.getDataLayout().isBigEndian())
10243 GRP = DAG.getNode(ISD::SRL, dl, MVT::i32,
10244 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, GRP),
10245 DAG.getConstant(16, dl, MVT::i32));
10246 return DAG.getTruncStore(
10247 ST->getChain(), dl, GRP, ST->getBasePtr(),
10249 ST->getMemOperand());
10250}
10251
10253 const ARMSubtarget *Subtarget) {
10254 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10255 EVT MemVT = ST->getMemoryVT();
10256 assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
10257
10258 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10259 !Subtarget->isThumb1Only() && ST->isVolatile() &&
10260 ST->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10261 SDNode *N = Op.getNode();
10262 SDLoc dl(N);
10263
10264 SDValue Lo = DAG.getNode(
10265 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10266 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 0 : 1, dl,
10267 MVT::i32));
10268 SDValue Hi = DAG.getNode(
10269 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10270 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 1 : 0, dl,
10271 MVT::i32));
10272
10273 return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
10274 {ST->getChain(), Lo, Hi, ST->getBasePtr()},
10275 MemVT, ST->getMemOperand());
10276 } else if (Subtarget->hasMVEIntegerOps() &&
10277 ((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10278 MemVT == MVT::v16i1))) {
10279 return LowerPredicateStore(Op, DAG);
10280 }
10281
10282 return SDValue();
10283}
10284
10285static bool isZeroVector(SDValue N) {
10286 return (ISD::isBuildVectorAllZeros(N.getNode()) ||
10287 (N->getOpcode() == ARMISD::VMOVIMM &&
10288 isNullConstant(N->getOperand(0))));
10289}
10290
10292 MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
10293 MVT VT = Op.getSimpleValueType();
10294 SDValue Mask = N->getMask();
10295 SDValue PassThru = N->getPassThru();
10296 SDLoc dl(Op);
10297
10298 if (isZeroVector(PassThru))
10299 return Op;
10300
10301 // MVE Masked loads use zero as the passthru value. Here we convert undef to
10302 // zero too, and other values are lowered to a select.
10303 SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
10304 DAG.getTargetConstant(0, dl, MVT::i32));
10305 SDValue NewLoad = DAG.getMaskedLoad(
10306 VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,
10307 N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
10308 N->getExtensionType(), N->isExpandingLoad());
10309 SDValue Combo = NewLoad;
10310 bool PassThruIsCastZero = (PassThru.getOpcode() == ISD::BITCAST ||
10311 PassThru.getOpcode() == ARMISD::VECTOR_REG_CAST) &&
10312 isZeroVector(PassThru->getOperand(0));
10313 if (!PassThru.isUndef() && !PassThruIsCastZero)
10314 Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
10315 return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
10316}
10317
10319 const ARMSubtarget *ST) {
10320 if (!ST->hasMVEIntegerOps())
10321 return SDValue();
10322
10323 SDLoc dl(Op);
10324 unsigned BaseOpcode = 0;
10325 switch (Op->getOpcode()) {
10326 default: llvm_unreachable("Expected VECREDUCE opcode");
10327 case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
10328 case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
10329 case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
10330 case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
10331 case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
10332 case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
10333 case ISD::VECREDUCE_FMAX: BaseOpcode = ISD::FMAXNUM; break;
10334 case ISD::VECREDUCE_FMIN: BaseOpcode = ISD::FMINNUM; break;
10335 }
10336
10337 SDValue Op0 = Op->getOperand(0);
10338 EVT VT = Op0.getValueType();
10339 EVT EltVT = VT.getVectorElementType();
10340 unsigned NumElts = VT.getVectorNumElements();
10341 unsigned NumActiveLanes = NumElts;
10342
10343 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10344 NumActiveLanes == 2) &&
10345 "Only expected a power 2 vector size");
10346
10347 // Use Mul(X, Rev(X)) until 4 items remain. Going down to 4 vector elements
10348 // allows us to easily extract vector elements from the lanes.
10349 while (NumActiveLanes > 4) {
10350 unsigned RevOpcode = NumActiveLanes == 16 ? ARMISD::VREV16 : ARMISD::VREV32;
10351 SDValue Rev = DAG.getNode(RevOpcode, dl, VT, Op0);
10352 Op0 = DAG.getNode(BaseOpcode, dl, VT, Op0, Rev);
10353 NumActiveLanes /= 2;
10354 }
10355
10356 SDValue Res;
10357 if (NumActiveLanes == 4) {
10358 // The remaining 4 elements are summed sequentially
10359 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10360 DAG.getConstant(0 * NumElts / 4, dl, MVT::i32));
10361 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10362 DAG.getConstant(1 * NumElts / 4, dl, MVT::i32));
10363 SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10364 DAG.getConstant(2 * NumElts / 4, dl, MVT::i32));
10365 SDValue Ext3 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10366 DAG.getConstant(3 * NumElts / 4, dl, MVT::i32));
10367 SDValue Res0 = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10368 SDValue Res1 = DAG.getNode(BaseOpcode, dl, EltVT, Ext2, Ext3, Op->getFlags());
10369 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res0, Res1, Op->getFlags());
10370 } else {
10371 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10372 DAG.getConstant(0, dl, MVT::i32));
10373 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10374 DAG.getConstant(1, dl, MVT::i32));
10375 Res = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10376 }
10377
10378 // Result type may be wider than element type.
10379 if (EltVT != Op->getValueType(0))
10380 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Op->getValueType(0), Res);
10381 return Res;
10382}
10383
10385 const ARMSubtarget *ST) {
10386 if (!ST->hasMVEFloatOps())
10387 return SDValue();
10388 return LowerVecReduce(Op, DAG, ST);
10389}
10390
10392 const ARMSubtarget *ST) {
10393 if (!ST->hasNEON())
10394 return SDValue();
10395
10396 SDLoc dl(Op);
10397 SDValue Op0 = Op->getOperand(0);
10398 EVT VT = Op0.getValueType();
10399 EVT EltVT = VT.getVectorElementType();
10400
10401 unsigned PairwiseIntrinsic = 0;
10402 switch (Op->getOpcode()) {
10403 default:
10404 llvm_unreachable("Expected VECREDUCE opcode");
10406 PairwiseIntrinsic = Intrinsic::arm_neon_vpminu;
10407 break;
10409 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxu;
10410 break;
10412 PairwiseIntrinsic = Intrinsic::arm_neon_vpmins;
10413 break;
10415 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxs;
10416 break;
10417 }
10418 SDValue PairwiseOp = DAG.getConstant(PairwiseIntrinsic, dl, MVT::i32);
10419
10420 unsigned NumElts = VT.getVectorNumElements();
10421 unsigned NumActiveLanes = NumElts;
10422
10423 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10424 NumActiveLanes == 2) &&
10425 "Only expected a power 2 vector size");
10426
10427 // Split 128-bit vectors, since vpmin/max takes 2 64-bit vectors.
10428 if (VT.is128BitVector()) {
10429 SDValue Lo, Hi;
10430 std::tie(Lo, Hi) = DAG.SplitVector(Op0, dl);
10431 VT = Lo.getValueType();
10432 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Lo, Hi});
10433 NumActiveLanes /= 2;
10434 }
10435
10436 // Use pairwise reductions until one lane remains
10437 while (NumActiveLanes > 1) {
10438 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Op0, Op0});
10439 NumActiveLanes /= 2;
10440 }
10441
10442 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10443 DAG.getConstant(0, dl, MVT::i32));
10444
10445 // Result type may be wider than element type.
10446 if (EltVT != Op.getValueType()) {
10447 unsigned Extend = 0;
10448 switch (Op->getOpcode()) {
10449 default:
10450 llvm_unreachable("Expected VECREDUCE opcode");
10453 Extend = ISD::ZERO_EXTEND;
10454 break;
10457 Extend = ISD::SIGN_EXTEND;
10458 break;
10459 }
10460 Res = DAG.getNode(Extend, dl, Op.getValueType(), Res);
10461 }
10462 return Res;
10463}
10464
10466 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getSuccessOrdering()))
10467 // Acquire/Release load/store is not legal for targets without a dmb or
10468 // equivalent available.
10469 return SDValue();
10470
10471 // Monotonic load/store is legal for all targets.
10472 return Op;
10473}
10474
10477 SelectionDAG &DAG,
10478 const ARMSubtarget *Subtarget) {
10479 SDLoc DL(N);
10480 // Under Power Management extensions, the cycle-count is:
10481 // mrc p15, #0, <Rt>, c9, c13, #0
10482 SDValue Ops[] = { N->getOperand(0), // Chain
10483 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
10484 DAG.getTargetConstant(15, DL, MVT::i32),
10485 DAG.getTargetConstant(0, DL, MVT::i32),
10486 DAG.getTargetConstant(9, DL, MVT::i32),
10487 DAG.getTargetConstant(13, DL, MVT::i32),
10488 DAG.getTargetConstant(0, DL, MVT::i32)
10489 };
10490
10491 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
10492 DAG.getVTList(MVT::i32, MVT::Other), Ops);
10493 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
10494 DAG.getConstant(0, DL, MVT::i32)));
10495 Results.push_back(Cycles32.getValue(1));
10496}
10497
10499 SDValue V1) {
10500 SDLoc dl(V0.getNode());
10501 SDValue RegClass =
10502 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
10503 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
10504 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
10505 const SDValue Ops[] = {RegClass, V0, SubReg0, V1, SubReg1};
10506 return SDValue(
10507 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
10508}
10509
10511 SDLoc dl(V.getNode());
10512 auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);
10513 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10514 if (isBigEndian)
10515 std::swap(VLo, VHi);
10516 return createGPRPairNode2xi32(DAG, VLo, VHi);
10517}
10518
10521 SelectionDAG &DAG) {
10522 assert(N->getValueType(0) == MVT::i64 &&
10523 "AtomicCmpSwap on types less than 64 should be legal");
10524 SDValue Ops[] = {
10525 createGPRPairNode2xi32(DAG, N->getOperand(1),
10526 DAG.getUNDEF(MVT::i32)), // pointer, temp
10527 createGPRPairNodei64(DAG, N->getOperand(2)), // expected
10528 createGPRPairNodei64(DAG, N->getOperand(3)), // new
10529 N->getOperand(0), // chain in
10530 };
10531 SDNode *CmpSwap = DAG.getMachineNode(
10532 ARM::CMP_SWAP_64, SDLoc(N),
10533 DAG.getVTList(MVT::Untyped, MVT::Untyped, MVT::Other), Ops);
10534
10535 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
10536 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
10537
10538 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10539
10540 SDValue Lo =
10541 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
10542 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10543 SDValue Hi =
10544 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
10545 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10546 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i64, Lo, Hi));
10547 Results.push_back(SDValue(CmpSwap, 2));
10548}
10549
10550SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
10551 SDLoc dl(Op);
10552 EVT VT = Op.getValueType();
10553 SDValue Chain = Op.getOperand(0);
10554 SDValue LHS = Op.getOperand(1);
10555 SDValue RHS = Op.getOperand(2);
10556 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
10557 bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
10558
10559 // If we don't have instructions of this float type then soften to a libcall
10560 // and use SETCC instead.
10561 if (isUnsupportedFloatingType(LHS.getValueType())) {
10563 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS, Chain, IsSignaling);
10564 if (!RHS.getNode()) {
10565 RHS = DAG.getConstant(0, dl, LHS.getValueType());
10566 CC = ISD::SETNE;
10567 }
10568 SDValue Result = DAG.getNode(ISD::SETCC, dl, VT, LHS, RHS,
10569 DAG.getCondCode(CC));
10570 return DAG.getMergeValues({Result, Chain}, dl);
10571 }
10572
10573 ARMCC::CondCodes CondCode, CondCode2;
10574 FPCCToARMCC(CC, CondCode, CondCode2);
10575
10576 SDValue True = DAG.getConstant(1, dl, VT);
10577 SDValue False = DAG.getConstant(0, dl, VT);
10578 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
10579 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10580 SDValue Result = getCMOV(dl, VT, False, True, ARMcc, Cmp, DAG);
10581 if (CondCode2 != ARMCC::AL) {
10582 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
10583 Result = getCMOV(dl, VT, Result, True, ARMcc, Cmp, DAG);
10584 }
10585 return DAG.getMergeValues({Result, Chain}, dl);
10586}
10587
10588SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const {
10590
10591 EVT VT = getPointerTy(DAG.getDataLayout());
10592 SDLoc DL(Op);
10593 int FI = MFI.CreateFixedObject(4, 0, false);
10594 return DAG.getFrameIndex(FI, VT);
10595}
10596
10597SDValue ARMTargetLowering::LowerFP_TO_BF16(SDValue Op,
10598 SelectionDAG &DAG) const {
10599 SDLoc DL(Op);
10600 MakeLibCallOptions CallOptions;
10601 MVT SVT = Op.getOperand(0).getSimpleValueType();
10602 RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, MVT::bf16);
10603 SDValue Res =
10604 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
10605 return DAG.getBitcast(MVT::i32, Res);
10606}
10607
10609 LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
10610 switch (Op.getOpcode()) {
10611 default: llvm_unreachable("Don't know how to custom lower this!");
10612 case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
10613 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10614 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10615 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10616 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10617 case ISD::SELECT: return LowerSELECT(Op, DAG);
10618 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10619 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
10620 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
10621 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
10622 case ISD::VASTART: return LowerVASTART(Op, DAG);
10623 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
10624 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
10625 case ISD::SINT_TO_FP:
10626 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10629 case ISD::FP_TO_SINT:
10630 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
10632 case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG, Subtarget);
10633 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
10634 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10635 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10636 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
10637 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
10638 case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
10639 case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);
10640 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
10641 Subtarget);
10642 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
10643 case ISD::SHL:
10644 case ISD::SRL:
10645 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
10646 case ISD::SREM: return LowerREM(Op.getNode(), DAG);
10647 case ISD::UREM: return LowerREM(Op.getNode(), DAG);
10648 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
10649 case ISD::SRL_PARTS:
10650 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
10651 case ISD::CTTZ:
10652 case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
10653 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
10654 case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget);
10655 case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
10656 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
10657 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
10658 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
10659 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG, Subtarget);
10660 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10661 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);
10662 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);
10663 case ISD::TRUNCATE: return LowerTruncate(Op.getNode(), DAG, Subtarget);
10664 case ISD::SIGN_EXTEND:
10665 case ISD::ZERO_EXTEND: return LowerVectorExtend(Op.getNode(), DAG, Subtarget);
10666 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
10667 case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
10668 case ISD::SET_FPMODE:
10669 return LowerSET_FPMODE(Op, DAG);
10670 case ISD::RESET_FPMODE:
10671 return LowerRESET_FPMODE(Op, DAG);
10672 case ISD::MUL: return LowerMUL(Op, DAG);
10673 case ISD::SDIV:
10674 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10675 return LowerDIV_Windows(Op, DAG, /* Signed */ true);
10676 return LowerSDIV(Op, DAG, Subtarget);
10677 case ISD::UDIV:
10678 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10679 return LowerDIV_Windows(Op, DAG, /* Signed */ false);
10680 return LowerUDIV(Op, DAG, Subtarget);
10681 case ISD::UADDO_CARRY:
10682 case ISD::USUBO_CARRY:
10683 return LowerUADDSUBO_CARRY(Op, DAG);
10684 case ISD::SADDO:
10685 case ISD::SSUBO:
10686 return LowerSignedALUO(Op, DAG);
10687 case ISD::UADDO:
10688 case ISD::USUBO:
10689 return LowerUnsignedALUO(Op, DAG);
10690 case ISD::SADDSAT:
10691 case ISD::SSUBSAT:
10692 case ISD::UADDSAT:
10693 case ISD::USUBSAT:
10694 return LowerADDSUBSAT(Op, DAG, Subtarget);
10695 case ISD::LOAD:
10696 return LowerPredicateLoad(Op, DAG);
10697 case ISD::STORE:
10698 return LowerSTORE(Op, DAG, Subtarget);
10699 case ISD::MLOAD:
10700 return LowerMLOAD(Op, DAG);
10701 case ISD::VECREDUCE_MUL:
10702 case ISD::VECREDUCE_AND:
10703 case ISD::VECREDUCE_OR:
10704 case ISD::VECREDUCE_XOR:
10705 return LowerVecReduce(Op, DAG, Subtarget);
10710 return LowerVecReduceF(Op, DAG, Subtarget);
10715 return LowerVecReduceMinMax(Op, DAG, Subtarget);
10716 case ISD::ATOMIC_LOAD:
10717 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
10718 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
10719 case ISD::SDIVREM:
10720 case ISD::UDIVREM: return LowerDivRem(Op, DAG);
10722 if (Subtarget->isTargetWindows())
10723 return LowerDYNAMIC_STACKALLOC(Op, DAG);
10724 llvm_unreachable("Don't know how to custom lower this!");
10726 case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
10728 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10729 case ISD::STRICT_FSETCC:
10730 case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
10731 case ISD::SPONENTRY:
10732 return LowerSPONENTRY(Op, DAG);
10733 case ISD::FP_TO_BF16:
10734 return LowerFP_TO_BF16(Op, DAG);
10735 case ARMISD::WIN__DBZCHK: return SDValue();
10736 }
10737}
10738
10740 SelectionDAG &DAG) {
10741 unsigned IntNo = N->getConstantOperandVal(0);
10742 unsigned Opc = 0;
10743 if (IntNo == Intrinsic::arm_smlald)
10744 Opc = ARMISD::SMLALD;
10745 else if (IntNo == Intrinsic::arm_smlaldx)
10746 Opc = ARMISD::SMLALDX;
10747 else if (IntNo == Intrinsic::arm_smlsld)
10748 Opc = ARMISD::SMLSLD;
10749 else if (IntNo == Intrinsic::arm_smlsldx)
10750 Opc = ARMISD::SMLSLDX;
10751 else
10752 return;
10753
10754 SDLoc dl(N);
10755 SDValue Lo, Hi;
10756 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(3), dl, MVT::i32, MVT::i32);
10757
10758 SDValue LongMul = DAG.getNode(Opc, dl,
10759 DAG.getVTList(MVT::i32, MVT::i32),
10760 N->getOperand(1), N->getOperand(2),
10761 Lo, Hi);
10762 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
10763 LongMul.getValue(0), LongMul.getValue(1)));
10764}
10765
10766/// ReplaceNodeResults - Replace the results of node with an illegal result
10767/// type with new values built out of custom code.
10770 SelectionDAG &DAG) const {
10771 SDValue Res;
10772 switch (N->getOpcode()) {
10773 default:
10774 llvm_unreachable("Don't know how to custom expand this!");
10775 case ISD::READ_REGISTER:
10777 break;
10778 case ISD::BITCAST:
10779 Res = ExpandBITCAST(N, DAG, Subtarget);
10780 break;
10781 case ISD::SRL:
10782 case ISD::SRA:
10783 case ISD::SHL:
10784 Res = Expand64BitShift(N, DAG, Subtarget);
10785 break;
10786 case ISD::SREM:
10787 case ISD::UREM:
10788 Res = LowerREM(N, DAG);
10789 break;
10790 case ISD::SDIVREM:
10791 case ISD::UDIVREM:
10792 Res = LowerDivRem(SDValue(N, 0), DAG);
10793 assert(Res.getNumOperands() == 2 && "DivRem needs two values");
10794 Results.push_back(Res.getValue(0));
10795 Results.push_back(Res.getValue(1));
10796 return;
10797 case ISD::SADDSAT:
10798 case ISD::SSUBSAT:
10799 case ISD::UADDSAT:
10800 case ISD::USUBSAT:
10801 Res = LowerADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
10802 break;
10804 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
10805 return;
10806 case ISD::UDIV:
10807 case ISD::SDIV:
10808 assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
10809 return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
10810 Results);
10813 return;
10815 return ReplaceLongIntrinsic(N, Results, DAG);
10816 case ISD::LOAD:
10817 LowerLOAD(N, Results, DAG);
10818 break;
10819 case ISD::TRUNCATE:
10820 Res = LowerTruncate(N, DAG, Subtarget);
10821 break;
10822 case ISD::SIGN_EXTEND:
10823 case ISD::ZERO_EXTEND:
10824 Res = LowerVectorExtend(N, DAG, Subtarget);
10825 break;
10828 Res = LowerFP_TO_INT_SAT(SDValue(N, 0), DAG, Subtarget);
10829 break;
10830 }
10831 if (Res.getNode())
10832 Results.push_back(Res);
10833}
10834
10835//===----------------------------------------------------------------------===//
10836// ARM Scheduler Hooks
10837//===----------------------------------------------------------------------===//
10838
10839/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
10840/// registers the function context.
10841void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
10843 MachineBasicBlock *DispatchBB,
10844 int FI) const {
10845 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
10846 "ROPI/RWPI not currently supported with SjLj");
10847 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10848 DebugLoc dl = MI.getDebugLoc();
10849 MachineFunction *MF = MBB->getParent();
10853 const Function &F = MF->getFunction();
10854
10855 bool isThumb = Subtarget->isThumb();
10856 bool isThumb2 = Subtarget->isThumb2();
10857
10858 unsigned PCLabelId = AFI->createPICLabelUId();
10859 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
10861 ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
10862 unsigned CPI = MCP->getConstantPoolIndex(CPV, Align(4));
10863
10864 const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
10865 : &ARM::GPRRegClass;
10866
10867 // Grab constant pool and fixed stack memory operands.
10868 MachineMemOperand *CPMMO =
10871
10872 MachineMemOperand *FIMMOSt =
10875
10876 // Load the address of the dispatch MBB into the jump buffer.
10877 if (isThumb2) {
10878 // Incoming value: jbuf
10879 // ldr.n r5, LCPI1_1
10880 // orr r5, r5, #1
10881 // add r5, pc
10882 // str r5, [$jbuf, #+4] ; &jbuf[1]
10883 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10884 BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
10886 .addMemOperand(CPMMO)
10888 // Set the low bit because of thumb mode.
10889 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10890 BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
10891 .addReg(NewVReg1, RegState::Kill)
10892 .addImm(0x01)
10894 .add(condCodeOp());
10895 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10896 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
10897 .addReg(NewVReg2, RegState::Kill)
10898 .addImm(PCLabelId);
10899 BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
10900 .addReg(NewVReg3, RegState::Kill)
10901 .addFrameIndex(FI)
10902 .addImm(36) // &jbuf[1] :: pc
10903 .addMemOperand(FIMMOSt)
10905 } else if (isThumb) {
10906 // Incoming value: jbuf
10907 // ldr.n r1, LCPI1_4
10908 // add r1, pc
10909 // mov r2, #1
10910 // orrs r1, r2
10911 // add r2, $jbuf, #+4 ; &jbuf[1]
10912 // str r1, [r2]
10913 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10914 BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
10916 .addMemOperand(CPMMO)
10918 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10919 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
10920 .addReg(NewVReg1, RegState::Kill)
10921 .addImm(PCLabelId);
10922 // Set the low bit because of thumb mode.
10923 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10924 BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
10925 .addReg(ARM::CPSR, RegState::Define)
10926 .addImm(1)
10928 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10929 BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
10930 .addReg(ARM::CPSR, RegState::Define)
10931 .addReg(NewVReg2, RegState::Kill)
10932 .addReg(NewVReg3, RegState::Kill)
10934 Register NewVReg5 = MRI->createVirtualRegister(TRC);
10935 BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
10936 .addFrameIndex(FI)
10937 .addImm(36); // &jbuf[1] :: pc
10938 BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
10939 .addReg(NewVReg4, RegState::Kill)
10940 .addReg(NewVReg5, RegState::Kill)
10941 .addImm(0)
10942 .addMemOperand(FIMMOSt)
10944 } else {
10945 // Incoming value: jbuf
10946 // ldr r1, LCPI1_1
10947 // add r1, pc, r1
10948 // str r1, [$jbuf, #+4] ; &jbuf[1]
10949 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10950 BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
10952 .addImm(0)
10953 .addMemOperand(CPMMO)
10955 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10956 BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
10957 .addReg(NewVReg1, RegState::Kill)
10958 .addImm(PCLabelId)
10960 BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
10961 .addReg(NewVReg2, RegState::Kill)
10962 .addFrameIndex(FI)
10963 .addImm(36) // &jbuf[1] :: pc
10964 .addMemOperand(FIMMOSt)
10966 }
10967}
10968
10969void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
10970 MachineBasicBlock *MBB) const {
10971 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10972 DebugLoc dl = MI.getDebugLoc();
10973 MachineFunction *MF = MBB->getParent();
10975 MachineFrameInfo &MFI = MF->getFrameInfo();
10976 int FI = MFI.getFunctionContextIndex();
10977
10978 const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
10979 : &ARM::GPRnopcRegClass;
10980
10981 // Get a mapping of the call site numbers to all of the landing pads they're
10982 // associated with.
10984 unsigned MaxCSNum = 0;
10985 for (MachineBasicBlock &BB : *MF) {
10986 if (!BB.isEHPad())
10987 continue;
10988
10989 // FIXME: We should assert that the EH_LABEL is the first MI in the landing
10990 // pad.
10991 for (MachineInstr &II : BB) {
10992 if (!II.isEHLabel())
10993 continue;
10994
10995 MCSymbol *Sym = II.getOperand(0).getMCSymbol();
10996 if (!MF->hasCallSiteLandingPad(Sym)) continue;
10997
10998 SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
10999 for (unsigned Idx : CallSiteIdxs) {
11000 CallSiteNumToLPad[Idx].push_back(&BB);
11001 MaxCSNum = std::max(MaxCSNum, Idx);
11002 }
11003 break;
11004 }
11005 }
11006
11007 // Get an ordered list of the machine basic blocks for the jump table.
11008 std::vector<MachineBasicBlock*> LPadList;
11010 LPadList.reserve(CallSiteNumToLPad.size());
11011 for (unsigned I = 1; I <= MaxCSNum; ++I) {
11012 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
11013 for (MachineBasicBlock *MBB : MBBList) {
11014 LPadList.push_back(MBB);
11015 InvokeBBs.insert(MBB->pred_begin(), MBB->pred_end());
11016 }
11017 }
11018
11019 assert(!LPadList.empty() &&
11020 "No landing pad destinations for the dispatch jump table!");
11021
11022 // Create the jump table and associated information.
11024 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
11025 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
11026
11027 // Create the MBBs for the dispatch code.
11028
11029 // Shove the dispatch's address into the return slot in the function context.
11030 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
11031 DispatchBB->setIsEHPad();
11032
11033 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
11034 unsigned trap_opcode;
11035 if (Subtarget->isThumb())
11036 trap_opcode = ARM::tTRAP;
11037 else
11038 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
11039
11040 BuildMI(TrapBB, dl, TII->get(trap_opcode));
11041 DispatchBB->addSuccessor(TrapBB);
11042
11043 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
11044 DispatchBB->addSuccessor(DispContBB);
11045
11046 // Insert and MBBs.
11047 MF->insert(MF->end(), DispatchBB);
11048 MF->insert(MF->end(), DispContBB);
11049 MF->insert(MF->end(), TrapBB);
11050
11051 // Insert code into the entry block that creates and registers the function
11052 // context.
11053 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
11054
11055 MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
11058
11060 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
11061
11062 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
11063 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
11064
11065 // Add a register mask with no preserved registers. This results in all
11066 // registers being marked as clobbered. This can't work if the dispatch block
11067 // is in a Thumb1 function and is linked with ARM code which uses the FP
11068 // registers, as there is no way to preserve the FP registers in Thumb1 mode.
11070
11071 bool IsPositionIndependent = isPositionIndependent();
11072 unsigned NumLPads = LPadList.size();
11073 if (Subtarget->isThumb2()) {
11074 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11075 BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
11076 .addFrameIndex(FI)
11077 .addImm(4)
11078 .addMemOperand(FIMMOLd)
11080
11081 if (NumLPads < 256) {
11082 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
11083 .addReg(NewVReg1)
11084 .addImm(LPadList.size())
11086 } else {
11087 Register VReg1 = MRI->createVirtualRegister(TRC);
11088 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
11089 .addImm(NumLPads & 0xFFFF)
11091
11092 unsigned VReg2 = VReg1;
11093 if ((NumLPads & 0xFFFF0000) != 0) {
11094 VReg2 = MRI->createVirtualRegister(TRC);
11095 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
11096 .addReg(VReg1)
11097 .addImm(NumLPads >> 16)
11099 }
11100
11101 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
11102 .addReg(NewVReg1)
11103 .addReg(VReg2)
11105 }
11106
11107 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
11108 .addMBB(TrapBB)
11110 .addReg(ARM::CPSR);
11111
11112 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11113 BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
11114 .addJumpTableIndex(MJTI)
11116
11117 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11118 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
11119 .addReg(NewVReg3, RegState::Kill)
11120 .addReg(NewVReg1)
11123 .add(condCodeOp());
11124
11125 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
11126 .addReg(NewVReg4, RegState::Kill)
11127 .addReg(NewVReg1)
11128 .addJumpTableIndex(MJTI);
11129 } else if (Subtarget->isThumb()) {
11130 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11131 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
11132 .addFrameIndex(FI)
11133 .addImm(1)
11134 .addMemOperand(FIMMOLd)
11136
11137 if (NumLPads < 256) {
11138 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
11139 .addReg(NewVReg1)
11140 .addImm(NumLPads)
11142 } else {
11143 MachineConstantPool *ConstantPool = MF->getConstantPool();
11144 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11145 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11146
11147 // MachineConstantPool wants an explicit alignment.
11148 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11149 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11150
11151 Register VReg1 = MRI->createVirtualRegister(TRC);
11152 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
11153 .addReg(VReg1, RegState::Define)
11156 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
11157 .addReg(NewVReg1)
11158 .addReg(VReg1)
11160 }
11161
11162 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
11163 .addMBB(TrapBB)
11165 .addReg(ARM::CPSR);
11166
11167 Register NewVReg2 = MRI->createVirtualRegister(TRC);
11168 BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
11169 .addReg(ARM::CPSR, RegState::Define)
11170 .addReg(NewVReg1)
11171 .addImm(2)
11173
11174 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11175 BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
11176 .addJumpTableIndex(MJTI)
11178
11179 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11180 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
11181 .addReg(ARM::CPSR, RegState::Define)
11182 .addReg(NewVReg2, RegState::Kill)
11183 .addReg(NewVReg3)
11185
11186 MachineMemOperand *JTMMOLd =
11187 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11189
11190 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11191 BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
11192 .addReg(NewVReg4, RegState::Kill)
11193 .addImm(0)
11194 .addMemOperand(JTMMOLd)
11196
11197 unsigned NewVReg6 = NewVReg5;
11198 if (IsPositionIndependent) {
11199 NewVReg6 = MRI->createVirtualRegister(TRC);
11200 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
11201 .addReg(ARM::CPSR, RegState::Define)
11202 .addReg(NewVReg5, RegState::Kill)
11203 .addReg(NewVReg3)
11205 }
11206
11207 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
11208 .addReg(NewVReg6, RegState::Kill)
11209 .addJumpTableIndex(MJTI);
11210 } else {
11211 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11212 BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
11213 .addFrameIndex(FI)
11214 .addImm(4)
11215 .addMemOperand(FIMMOLd)
11217
11218 if (NumLPads < 256) {
11219 BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
11220 .addReg(NewVReg1)
11221 .addImm(NumLPads)
11223 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
11224 Register VReg1 = MRI->createVirtualRegister(TRC);
11225 BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
11226 .addImm(NumLPads & 0xFFFF)
11228
11229 unsigned VReg2 = VReg1;
11230 if ((NumLPads & 0xFFFF0000) != 0) {
11231 VReg2 = MRI->createVirtualRegister(TRC);
11232 BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
11233 .addReg(VReg1)
11234 .addImm(NumLPads >> 16)
11236 }
11237
11238 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11239 .addReg(NewVReg1)
11240 .addReg(VReg2)
11242 } else {
11243 MachineConstantPool *ConstantPool = MF->getConstantPool();
11244 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11245 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11246
11247 // MachineConstantPool wants an explicit alignment.
11248 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11249 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11250
11251 Register VReg1 = MRI->createVirtualRegister(TRC);
11252 BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
11253 .addReg(VReg1, RegState::Define)
11255 .addImm(0)
11257 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11258 .addReg(NewVReg1)
11259 .addReg(VReg1, RegState::Kill)
11261 }
11262
11263 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
11264 .addMBB(TrapBB)
11266 .addReg(ARM::CPSR);
11267
11268 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11269 BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
11270 .addReg(NewVReg1)
11273 .add(condCodeOp());
11274 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11275 BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
11276 .addJumpTableIndex(MJTI)
11278
11279 MachineMemOperand *JTMMOLd =
11280 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11282 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11283 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
11284 .addReg(NewVReg3, RegState::Kill)
11285 .addReg(NewVReg4)
11286 .addImm(0)
11287 .addMemOperand(JTMMOLd)
11289
11290 if (IsPositionIndependent) {
11291 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
11292 .addReg(NewVReg5, RegState::Kill)
11293 .addReg(NewVReg4)
11294 .addJumpTableIndex(MJTI);
11295 } else {
11296 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
11297 .addReg(NewVReg5, RegState::Kill)
11298 .addJumpTableIndex(MJTI);
11299 }
11300 }
11301
11302 // Add the jump table entries as successors to the MBB.
11304 for (MachineBasicBlock *CurMBB : LPadList) {
11305 if (SeenMBBs.insert(CurMBB).second)
11306 DispContBB->addSuccessor(CurMBB);
11307 }
11308
11309 // N.B. the order the invoke BBs are processed in doesn't matter here.
11310 const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
11312 for (MachineBasicBlock *BB : InvokeBBs) {
11313
11314 // Remove the landing pad successor from the invoke block and replace it
11315 // with the new dispatch block.
11316 SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
11317 while (!Successors.empty()) {
11318 MachineBasicBlock *SMBB = Successors.pop_back_val();
11319 if (SMBB->isEHPad()) {
11320 BB->removeSuccessor(SMBB);
11321 MBBLPads.push_back(SMBB);
11322 }
11323 }
11324
11325 BB->addSuccessor(DispatchBB, BranchProbability::getZero());
11326 BB->normalizeSuccProbs();
11327
11328 // Find the invoke call and mark all of the callee-saved registers as
11329 // 'implicit defined' so that they're spilled. This prevents code from
11330 // moving instructions to before the EH block, where they will never be
11331 // executed.
11333 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
11334 if (!II->isCall()) continue;
11335
11338 OI = II->operands_begin(), OE = II->operands_end();
11339 OI != OE; ++OI) {
11340 if (!OI->isReg()) continue;
11341 DefRegs[OI->getReg()] = true;
11342 }
11343
11344 MachineInstrBuilder MIB(*MF, &*II);
11345
11346 for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
11347 unsigned Reg = SavedRegs[i];
11348 if (Subtarget->isThumb2() &&
11349 !ARM::tGPRRegClass.contains(Reg) &&
11350 !ARM::hGPRRegClass.contains(Reg))
11351 continue;
11352 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
11353 continue;
11354 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
11355 continue;
11356 if (!DefRegs[Reg])
11358 }
11359
11360 break;
11361 }
11362 }
11363
11364 // Mark all former landing pads as non-landing pads. The dispatch is the only
11365 // landing pad now.
11366 for (MachineBasicBlock *MBBLPad : MBBLPads)
11367 MBBLPad->setIsEHPad(false);
11368
11369 // The instruction is gone now.
11370 MI.eraseFromParent();
11371}
11372
11373static
11375 for (MachineBasicBlock *S : MBB->successors())
11376 if (S != Succ)
11377 return S;
11378 llvm_unreachable("Expecting a BB with two successors!");
11379}
11380
11381/// Return the load opcode for a given load size. If load size >= 8,
11382/// neon opcode will be returned.
11383static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
11384 if (LdSize >= 8)
11385 return LdSize == 16 ? ARM::VLD1q32wb_fixed
11386 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
11387 if (IsThumb1)
11388 return LdSize == 4 ? ARM::tLDRi
11389 : LdSize == 2 ? ARM::tLDRHi
11390 : LdSize == 1 ? ARM::tLDRBi : 0;
11391 if (IsThumb2)
11392 return LdSize == 4 ? ARM::t2LDR_POST
11393 : LdSize == 2 ? ARM::t2LDRH_POST
11394 : LdSize == 1 ? ARM::t2LDRB_POST : 0;
11395 return LdSize == 4 ? ARM::LDR_POST_IMM
11396 : LdSize == 2 ? ARM::LDRH_POST
11397 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
11398}
11399
11400/// Return the store opcode for a given store size. If store size >= 8,
11401/// neon opcode will be returned.
11402static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
11403 if (StSize >= 8)
11404 return StSize == 16 ? ARM::VST1q32wb_fixed
11405 : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
11406 if (IsThumb1)
11407 return StSize == 4 ? ARM::tSTRi
11408 : StSize == 2 ? ARM::tSTRHi
11409 : StSize == 1 ? ARM::tSTRBi : 0;
11410 if (IsThumb2)
11411 return StSize == 4 ? ARM::t2STR_POST
11412 : StSize == 2 ? ARM::t2STRH_POST
11413 : StSize == 1 ? ARM::t2STRB_POST : 0;
11414 return StSize == 4 ? ARM::STR_POST_IMM
11415 : StSize == 2 ? ARM::STRH_POST
11416 : StSize == 1 ? ARM::STRB_POST_IMM : 0;
11417}
11418
11419/// Emit a post-increment load operation with given size. The instructions
11420/// will be added to BB at Pos.
11422 const TargetInstrInfo *TII, const DebugLoc &dl,
11423 unsigned LdSize, unsigned Data, unsigned AddrIn,
11424 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11425 unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
11426 assert(LdOpc != 0 && "Should have a load opcode");
11427 if (LdSize >= 8) {
11428 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11429 .addReg(AddrOut, RegState::Define)
11430 .addReg(AddrIn)
11431 .addImm(0)
11433 } else if (IsThumb1) {
11434 // load + update AddrIn
11435 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11436 .addReg(AddrIn)
11437 .addImm(0)
11439 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11440 .add(t1CondCodeOp())
11441 .addReg(AddrIn)
11442 .addImm(LdSize)
11444 } else if (IsThumb2) {
11445 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11446 .addReg(AddrOut, RegState::Define)
11447 .addReg(AddrIn)
11448 .addImm(LdSize)
11450 } else { // arm
11451 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11452 .addReg(AddrOut, RegState::Define)
11453 .addReg(AddrIn)
11454 .addReg(0)
11455 .addImm(LdSize)
11457 }
11458}
11459
11460/// Emit a post-increment store operation with given size. The instructions
11461/// will be added to BB at Pos.
11463 const TargetInstrInfo *TII, const DebugLoc &dl,
11464 unsigned StSize, unsigned Data, unsigned AddrIn,
11465 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11466 unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
11467 assert(StOpc != 0 && "Should have a store opcode");
11468 if (StSize >= 8) {
11469 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11470 .addReg(AddrIn)
11471 .addImm(0)
11472 .addReg(Data)
11474 } else if (IsThumb1) {
11475 // store + update AddrIn
11476 BuildMI(*BB, Pos, dl, TII->get(StOpc))
11477 .addReg(Data)
11478 .addReg(AddrIn)
11479 .addImm(0)
11481 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11482 .add(t1CondCodeOp())
11483 .addReg(AddrIn)
11484 .addImm(StSize)
11486 } else if (IsThumb2) {
11487 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11488 .addReg(Data)
11489 .addReg(AddrIn)
11490 .addImm(StSize)
11492 } else { // arm
11493 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11494 .addReg(Data)
11495 .addReg(AddrIn)
11496 .addReg(0)
11497 .addImm(StSize)
11499 }
11500}
11501
11503ARMTargetLowering::EmitStructByval(MachineInstr &MI,
11504 MachineBasicBlock *BB) const {
11505 // This pseudo instruction has 3 operands: dst, src, size
11506 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
11507 // Otherwise, we will generate unrolled scalar copies.
11508 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11509 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11511
11512 Register dest = MI.getOperand(0).getReg();
11513 Register src = MI.getOperand(1).getReg();
11514 unsigned SizeVal = MI.getOperand(2).getImm();
11515 unsigned Alignment = MI.getOperand(3).getImm();
11516 DebugLoc dl = MI.getDebugLoc();
11517
11518 MachineFunction *MF = BB->getParent();
11520 unsigned UnitSize = 0;
11521 const TargetRegisterClass *TRC = nullptr;
11522 const TargetRegisterClass *VecTRC = nullptr;
11523
11524 bool IsThumb1 = Subtarget->isThumb1Only();
11525 bool IsThumb2 = Subtarget->isThumb2();
11526 bool IsThumb = Subtarget->isThumb();
11527
11528 if (Alignment & 1) {
11529 UnitSize = 1;
11530 } else if (Alignment & 2) {
11531 UnitSize = 2;
11532 } else {
11533 // Check whether we can use NEON instructions.
11534 if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
11535 Subtarget->hasNEON()) {
11536 if ((Alignment % 16 == 0) && SizeVal >= 16)
11537 UnitSize = 16;
11538 else if ((Alignment % 8 == 0) && SizeVal >= 8)
11539 UnitSize = 8;
11540 }
11541 // Can't use NEON instructions.
11542 if (UnitSize == 0)
11543 UnitSize = 4;
11544 }
11545
11546 // Select the correct opcode and register class for unit size load/store
11547 bool IsNeon = UnitSize >= 8;
11548 TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
11549 if (IsNeon)
11550 VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
11551 : UnitSize == 8 ? &ARM::DPRRegClass
11552 : nullptr;
11553
11554 unsigned BytesLeft = SizeVal % UnitSize;
11555 unsigned LoopSize = SizeVal - BytesLeft;
11556
11557 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
11558 // Use LDR and STR to copy.
11559 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
11560 // [destOut] = STR_POST(scratch, destIn, UnitSize)
11561 unsigned srcIn = src;
11562 unsigned destIn = dest;
11563 for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
11564 Register srcOut = MRI.createVirtualRegister(TRC);
11565 Register destOut = MRI.createVirtualRegister(TRC);
11566 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11567 emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
11568 IsThumb1, IsThumb2);
11569 emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
11570 IsThumb1, IsThumb2);
11571 srcIn = srcOut;
11572 destIn = destOut;
11573 }
11574
11575 // Handle the leftover bytes with LDRB and STRB.
11576 // [scratch, srcOut] = LDRB_POST(srcIn, 1)
11577 // [destOut] = STRB_POST(scratch, destIn, 1)
11578 for (unsigned i = 0; i < BytesLeft; i++) {
11579 Register srcOut = MRI.createVirtualRegister(TRC);
11580 Register destOut = MRI.createVirtualRegister(TRC);
11581 Register scratch = MRI.createVirtualRegister(TRC);
11582 emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
11583 IsThumb1, IsThumb2);
11584 emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
11585 IsThumb1, IsThumb2);
11586 srcIn = srcOut;
11587 destIn = destOut;
11588 }
11589 MI.eraseFromParent(); // The instruction is gone now.
11590 return BB;
11591 }
11592
11593 // Expand the pseudo op to a loop.
11594 // thisMBB:
11595 // ...
11596 // movw varEnd, # --> with thumb2
11597 // movt varEnd, #
11598 // ldrcp varEnd, idx --> without thumb2
11599 // fallthrough --> loopMBB
11600 // loopMBB:
11601 // PHI varPhi, varEnd, varLoop
11602 // PHI srcPhi, src, srcLoop
11603 // PHI destPhi, dst, destLoop
11604 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11605 // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
11606 // subs varLoop, varPhi, #UnitSize
11607 // bne loopMBB
11608 // fallthrough --> exitMBB
11609 // exitMBB:
11610 // epilogue to handle left-over bytes
11611 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11612 // [destOut] = STRB_POST(scratch, destLoop, 1)
11613 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11614 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11615 MF->insert(It, loopMBB);
11616 MF->insert(It, exitMBB);
11617
11618 // Set the call frame size on entry to the new basic blocks.
11619 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
11620 loopMBB->setCallFrameSize(CallFrameSize);
11621 exitMBB->setCallFrameSize(CallFrameSize);
11622
11623 // Transfer the remainder of BB and its successor edges to exitMBB.
11624 exitMBB->splice(exitMBB->begin(), BB,
11625 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11627
11628 // Load an immediate to varEnd.
11629 Register varEnd = MRI.createVirtualRegister(TRC);
11630 if (Subtarget->useMovt()) {
11631 BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi32imm : ARM::MOVi32imm),
11632 varEnd)
11633 .addImm(LoopSize);
11634 } else if (Subtarget->genExecuteOnly()) {
11635 assert(IsThumb && "Non-thumb expected to have used movt");
11636 BuildMI(BB, dl, TII->get(ARM::tMOVi32imm), varEnd).addImm(LoopSize);
11637 } else {
11639 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11640 const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
11641
11642 // MachineConstantPool wants an explicit alignment.
11643 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11644 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11645 MachineMemOperand *CPMMO =
11648
11649 if (IsThumb)
11650 BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
11651 .addReg(varEnd, RegState::Define)
11654 .addMemOperand(CPMMO);
11655 else
11656 BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
11657 .addReg(varEnd, RegState::Define)
11659 .addImm(0)
11661 .addMemOperand(CPMMO);
11662 }
11663 BB->addSuccessor(loopMBB);
11664
11665 // Generate the loop body:
11666 // varPhi = PHI(varLoop, varEnd)
11667 // srcPhi = PHI(srcLoop, src)
11668 // destPhi = PHI(destLoop, dst)
11669 MachineBasicBlock *entryBB = BB;
11670 BB = loopMBB;
11671 Register varLoop = MRI.createVirtualRegister(TRC);
11672 Register varPhi = MRI.createVirtualRegister(TRC);
11673 Register srcLoop = MRI.createVirtualRegister(TRC);
11674 Register srcPhi = MRI.createVirtualRegister(TRC);
11675 Register destLoop = MRI.createVirtualRegister(TRC);
11676 Register destPhi = MRI.createVirtualRegister(TRC);
11677
11678 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
11679 .addReg(varLoop).addMBB(loopMBB)
11680 .addReg(varEnd).addMBB(entryBB);
11681 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
11682 .addReg(srcLoop).addMBB(loopMBB)
11683 .addReg(src).addMBB(entryBB);
11684 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
11685 .addReg(destLoop).addMBB(loopMBB)
11686 .addReg(dest).addMBB(entryBB);
11687
11688 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11689 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
11690 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11691 emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
11692 IsThumb1, IsThumb2);
11693 emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
11694 IsThumb1, IsThumb2);
11695
11696 // Decrement loop variable by UnitSize.
11697 if (IsThumb1) {
11698 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
11699 .add(t1CondCodeOp())
11700 .addReg(varPhi)
11701 .addImm(UnitSize)
11703 } else {
11705 BuildMI(*BB, BB->end(), dl,
11706 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
11707 MIB.addReg(varPhi)
11708 .addImm(UnitSize)
11710 .add(condCodeOp());
11711 MIB->getOperand(5).setReg(ARM::CPSR);
11712 MIB->getOperand(5).setIsDef(true);
11713 }
11714 BuildMI(*BB, BB->end(), dl,
11715 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
11716 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
11717
11718 // loopMBB can loop back to loopMBB or fall through to exitMBB.
11719 BB->addSuccessor(loopMBB);
11720 BB->addSuccessor(exitMBB);
11721
11722 // Add epilogue to handle BytesLeft.
11723 BB = exitMBB;
11724 auto StartOfExit = exitMBB->begin();
11725
11726 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11727 // [destOut] = STRB_POST(scratch, destLoop, 1)
11728 unsigned srcIn = srcLoop;
11729 unsigned destIn = destLoop;
11730 for (unsigned i = 0; i < BytesLeft; i++) {
11731 Register srcOut = MRI.createVirtualRegister(TRC);
11732 Register destOut = MRI.createVirtualRegister(TRC);
11733 Register scratch = MRI.createVirtualRegister(TRC);
11734 emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
11735 IsThumb1, IsThumb2);
11736 emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
11737 IsThumb1, IsThumb2);
11738 srcIn = srcOut;
11739 destIn = destOut;
11740 }
11741
11742 MI.eraseFromParent(); // The instruction is gone now.
11743 return BB;
11744}
11745
11747ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
11748 MachineBasicBlock *MBB) const {
11750 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
11751 DebugLoc DL = MI.getDebugLoc();
11752
11753 assert(Subtarget->isTargetWindows() &&
11754 "__chkstk is only supported on Windows");
11755 assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
11756
11757 // __chkstk takes the number of words to allocate on the stack in R4, and
11758 // returns the stack adjustment in number of bytes in R4. This will not
11759 // clober any other registers (other than the obvious lr).
11760 //
11761 // Although, technically, IP should be considered a register which may be
11762 // clobbered, the call itself will not touch it. Windows on ARM is a pure
11763 // thumb-2 environment, so there is no interworking required. As a result, we
11764 // do not expect a veneer to be emitted by the linker, clobbering IP.
11765 //
11766 // Each module receives its own copy of __chkstk, so no import thunk is
11767 // required, again, ensuring that IP is not clobbered.
11768 //
11769 // Finally, although some linkers may theoretically provide a trampoline for
11770 // out of range calls (which is quite common due to a 32M range limitation of
11771 // branches for Thumb), we can generate the long-call version via
11772 // -mcmodel=large, alleviating the need for the trampoline which may clobber
11773 // IP.
11774
11775 switch (TM.getCodeModel()) {
11776 case CodeModel::Tiny:
11777 llvm_unreachable("Tiny code model not available on ARM.");
11778 case CodeModel::Small:
11779 case CodeModel::Medium:
11780 case CodeModel::Kernel:
11781 BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
11783 .addExternalSymbol("__chkstk")
11786 .addReg(ARM::R12,
11788 .addReg(ARM::CPSR,
11790 break;
11791 case CodeModel::Large: {
11793 Register Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11794
11795 BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
11796 .addExternalSymbol("__chkstk");
11799 .addReg(Reg, RegState::Kill)
11802 .addReg(ARM::R12,
11804 .addReg(ARM::CPSR,
11806 break;
11807 }
11808 }
11809
11810 BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
11811 .addReg(ARM::SP, RegState::Kill)
11812 .addReg(ARM::R4, RegState::Kill)
11815 .add(condCodeOp());
11816
11817 MI.eraseFromParent();
11818 return MBB;
11819}
11820
11822ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
11823 MachineBasicBlock *MBB) const {
11824 DebugLoc DL = MI.getDebugLoc();
11825 MachineFunction *MF = MBB->getParent();
11826 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11827
11829 MF->insert(++MBB->getIterator(), ContBB);
11830 ContBB->splice(ContBB->begin(), MBB,
11831 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11833 MBB->addSuccessor(ContBB);
11834
11836 BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
11837 MF->push_back(TrapBB);
11838 MBB->addSuccessor(TrapBB);
11839
11840 BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
11841 .addReg(MI.getOperand(0).getReg())
11842 .addImm(0)
11844 BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
11845 .addMBB(TrapBB)
11847 .addReg(ARM::CPSR);
11848
11849 MI.eraseFromParent();
11850 return ContBB;
11851}
11852
11853// The CPSR operand of SelectItr might be missing a kill marker
11854// because there were multiple uses of CPSR, and ISel didn't know
11855// which to mark. Figure out whether SelectItr should have had a
11856// kill marker, and set it if it should. Returns the correct kill
11857// marker value.
11860 const TargetRegisterInfo* TRI) {
11861 // Scan forward through BB for a use/def of CPSR.
11862 MachineBasicBlock::iterator miI(std::next(SelectItr));
11863 for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
11864 const MachineInstr& mi = *miI;
11865 if (mi.readsRegister(ARM::CPSR, /*TRI=*/nullptr))
11866 return false;
11867 if (mi.definesRegister(ARM::CPSR, /*TRI=*/nullptr))
11868 break; // Should have kill-flag - update below.
11869 }
11870
11871 // If we hit the end of the block, check whether CPSR is live into a
11872 // successor.
11873 if (miI == BB->end()) {
11874 for (MachineBasicBlock *Succ : BB->successors())
11875 if (Succ->isLiveIn(ARM::CPSR))
11876 return false;
11877 }
11878
11879 // We found a def, or hit the end of the basic block and CPSR wasn't live
11880 // out. SelectMI should have a kill flag on CPSR.
11881 SelectItr->addRegisterKilled(ARM::CPSR, TRI);
11882 return true;
11883}
11884
11885/// Adds logic in loop entry MBB to calculate loop iteration count and adds
11886/// t2WhileLoopSetup and t2WhileLoopStart to generate WLS loop
11888 MachineBasicBlock *TpLoopBody,
11889 MachineBasicBlock *TpExit, Register OpSizeReg,
11890 const TargetInstrInfo *TII, DebugLoc Dl,
11892 // Calculates loop iteration count = ceil(n/16) = (n + 15) >> 4.
11893 Register AddDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11894 BuildMI(TpEntry, Dl, TII->get(ARM::t2ADDri), AddDestReg)
11895 .addUse(OpSizeReg)
11896 .addImm(15)
11898 .addReg(0);
11899
11900 Register LsrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11901 BuildMI(TpEntry, Dl, TII->get(ARM::t2LSRri), LsrDestReg)
11902 .addUse(AddDestReg, RegState::Kill)
11903 .addImm(4)
11905 .addReg(0);
11906
11907 Register TotalIterationsReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11908 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopSetup), TotalIterationsReg)
11909 .addUse(LsrDestReg, RegState::Kill);
11910
11911 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopStart))
11912 .addUse(TotalIterationsReg)
11913 .addMBB(TpExit);
11914
11915 BuildMI(TpEntry, Dl, TII->get(ARM::t2B))
11916 .addMBB(TpLoopBody)
11918
11919 return TotalIterationsReg;
11920}
11921
11922/// Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and
11923/// t2DoLoopEnd. These are used by later passes to generate tail predicated
11924/// loops.
11925static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
11926 MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit,
11927 const TargetInstrInfo *TII, DebugLoc Dl,
11928 MachineRegisterInfo &MRI, Register OpSrcReg,
11929 Register OpDestReg, Register ElementCountReg,
11930 Register TotalIterationsReg, bool IsMemcpy) {
11931 // First insert 4 PHI nodes for: Current pointer to Src (if memcpy), Dest
11932 // array, loop iteration counter, predication counter.
11933
11934 Register SrcPhiReg, CurrSrcReg;
11935 if (IsMemcpy) {
11936 // Current position in the src array
11937 SrcPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11938 CurrSrcReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11939 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), SrcPhiReg)
11940 .addUse(OpSrcReg)
11941 .addMBB(TpEntry)
11942 .addUse(CurrSrcReg)
11943 .addMBB(TpLoopBody);
11944 }
11945
11946 // Current position in the dest array
11947 Register DestPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11948 Register CurrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11949 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), DestPhiReg)
11950 .addUse(OpDestReg)
11951 .addMBB(TpEntry)
11952 .addUse(CurrDestReg)
11953 .addMBB(TpLoopBody);
11954
11955 // Current loop counter
11956 Register LoopCounterPhiReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11957 Register RemainingLoopIterationsReg =
11958 MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11959 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), LoopCounterPhiReg)
11960 .addUse(TotalIterationsReg)
11961 .addMBB(TpEntry)
11962 .addUse(RemainingLoopIterationsReg)
11963 .addMBB(TpLoopBody);
11964
11965 // Predication counter
11966 Register PredCounterPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11967 Register RemainingElementsReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11968 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), PredCounterPhiReg)
11969 .addUse(ElementCountReg)
11970 .addMBB(TpEntry)
11971 .addUse(RemainingElementsReg)
11972 .addMBB(TpLoopBody);
11973
11974 // Pass predication counter to VCTP
11975 Register VccrReg = MRI.createVirtualRegister(&ARM::VCCRRegClass);
11976 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)
11977 .addUse(PredCounterPhiReg)
11979 .addReg(0)
11980 .addReg(0);
11981
11982 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)
11983 .addUse(PredCounterPhiReg)
11984 .addImm(16)
11986 .addReg(0);
11987
11988 // VLDRB (only if memcpy) and VSTRB instructions, predicated using VPR
11989 Register SrcValueReg;
11990 if (IsMemcpy) {
11991 SrcValueReg = MRI.createVirtualRegister(&ARM::MQPRRegClass);
11992 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VLDRBU8_post))
11993 .addDef(CurrSrcReg)
11994 .addDef(SrcValueReg)
11995 .addReg(SrcPhiReg)
11996 .addImm(16)
11998 .addUse(VccrReg)
11999 .addReg(0);
12000 } else
12001 SrcValueReg = OpSrcReg;
12002
12003 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VSTRBU8_post))
12004 .addDef(CurrDestReg)
12005 .addUse(SrcValueReg)
12006 .addReg(DestPhiReg)
12007 .addImm(16)
12009 .addUse(VccrReg)
12010 .addReg(0);
12011
12012 // Add the pseudoInstrs for decrementing the loop counter and marking the
12013 // end:t2DoLoopDec and t2DoLoopEnd
12014 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopDec), RemainingLoopIterationsReg)
12015 .addUse(LoopCounterPhiReg)
12016 .addImm(1);
12017
12018 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopEnd))
12019 .addUse(RemainingLoopIterationsReg)
12020 .addMBB(TpLoopBody);
12021
12022 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2B))
12023 .addMBB(TpExit)
12025}
12026
12029 MachineBasicBlock *BB) const {
12030 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
12031 DebugLoc dl = MI.getDebugLoc();
12032 bool isThumb2 = Subtarget->isThumb2();
12033 switch (MI.getOpcode()) {
12034 default: {
12035 MI.print(errs());
12036 llvm_unreachable("Unexpected instr type to insert");
12037 }
12038
12039 // Thumb1 post-indexed loads are really just single-register LDMs.
12040 case ARM::tLDR_postidx: {
12041 MachineOperand Def(MI.getOperand(1));
12042 BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
12043 .add(Def) // Rn_wb
12044 .add(MI.getOperand(2)) // Rn
12045 .add(MI.getOperand(3)) // PredImm
12046 .add(MI.getOperand(4)) // PredReg
12047 .add(MI.getOperand(0)) // Rt
12048 .cloneMemRefs(MI);
12049 MI.eraseFromParent();
12050 return BB;
12051 }
12052
12053 case ARM::MVE_MEMCPYLOOPINST:
12054 case ARM::MVE_MEMSETLOOPINST: {
12055
12056 // Transformation below expands MVE_MEMCPYLOOPINST/MVE_MEMSETLOOPINST Pseudo
12057 // into a Tail Predicated (TP) Loop. It adds the instructions to calculate
12058 // the iteration count =ceil(size_in_bytes/16)) in the TP entry block and
12059 // adds the relevant instructions in the TP loop Body for generation of a
12060 // WLSTP loop.
12061
12062 // Below is relevant portion of the CFG after the transformation.
12063 // The Machine Basic Blocks are shown along with branch conditions (in
12064 // brackets). Note that TP entry/exit MBBs depict the entry/exit of this
12065 // portion of the CFG and may not necessarily be the entry/exit of the
12066 // function.
12067
12068 // (Relevant) CFG after transformation:
12069 // TP entry MBB
12070 // |
12071 // |-----------------|
12072 // (n <= 0) (n > 0)
12073 // | |
12074 // | TP loop Body MBB<--|
12075 // | | |
12076 // \ |___________|
12077 // \ /
12078 // TP exit MBB
12079
12080 MachineFunction *MF = BB->getParent();
12081 MachineFunctionProperties &Properties = MF->getProperties();
12083
12084 Register OpDestReg = MI.getOperand(0).getReg();
12085 Register OpSrcReg = MI.getOperand(1).getReg();
12086 Register OpSizeReg = MI.getOperand(2).getReg();
12087
12088 // Allocate the required MBBs and add to parent function.
12089 MachineBasicBlock *TpEntry = BB;
12090 MachineBasicBlock *TpLoopBody = MF->CreateMachineBasicBlock();
12091 MachineBasicBlock *TpExit;
12092
12093 MF->push_back(TpLoopBody);
12094
12095 // If any instructions are present in the current block after
12096 // MVE_MEMCPYLOOPINST or MVE_MEMSETLOOPINST, split the current block and
12097 // move the instructions into the newly created exit block. If there are no
12098 // instructions add an explicit branch to the FallThrough block and then
12099 // split.
12100 //
12101 // The split is required for two reasons:
12102 // 1) A terminator(t2WhileLoopStart) will be placed at that site.
12103 // 2) Since a TPLoopBody will be added later, any phis in successive blocks
12104 // need to be updated. splitAt() already handles this.
12105 TpExit = BB->splitAt(MI, false);
12106 if (TpExit == BB) {
12107 assert(BB->canFallThrough() && "Exit Block must be Fallthrough of the "
12108 "block containing memcpy/memset Pseudo");
12109 TpExit = BB->getFallThrough();
12110 BuildMI(BB, dl, TII->get(ARM::t2B))
12111 .addMBB(TpExit)
12113 TpExit = BB->splitAt(MI, false);
12114 }
12115
12116 // Add logic for iteration count
12117 Register TotalIterationsReg =
12118 genTPEntry(TpEntry, TpLoopBody, TpExit, OpSizeReg, TII, dl, MRI);
12119
12120 // Add the vectorized (and predicated) loads/store instructions
12121 bool IsMemcpy = MI.getOpcode() == ARM::MVE_MEMCPYLOOPINST;
12122 genTPLoopBody(TpLoopBody, TpEntry, TpExit, TII, dl, MRI, OpSrcReg,
12123 OpDestReg, OpSizeReg, TotalIterationsReg, IsMemcpy);
12124
12125 // Required to avoid conflict with the MachineVerifier during testing.
12127
12128 // Connect the blocks
12129 TpEntry->addSuccessor(TpLoopBody);
12130 TpLoopBody->addSuccessor(TpLoopBody);
12131 TpLoopBody->addSuccessor(TpExit);
12132
12133 // Reorder for a more natural layout
12134 TpLoopBody->moveAfter(TpEntry);
12135 TpExit->moveAfter(TpLoopBody);
12136
12137 // Finally, remove the memcpy Pseudo Instruction
12138 MI.eraseFromParent();
12139
12140 // Return the exit block as it may contain other instructions requiring a
12141 // custom inserter
12142 return TpExit;
12143 }
12144
12145 // The Thumb2 pre-indexed stores have the same MI operands, they just
12146 // define them differently in the .td files from the isel patterns, so
12147 // they need pseudos.
12148 case ARM::t2STR_preidx:
12149 MI.setDesc(TII->get(ARM::t2STR_PRE));
12150 return BB;
12151 case ARM::t2STRB_preidx:
12152 MI.setDesc(TII->get(ARM::t2STRB_PRE));
12153 return BB;
12154 case ARM::t2STRH_preidx:
12155 MI.setDesc(TII->get(ARM::t2STRH_PRE));
12156 return BB;
12157
12158 case ARM::STRi_preidx:
12159 case ARM::STRBi_preidx: {
12160 unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
12161 : ARM::STRB_PRE_IMM;
12162 // Decode the offset.
12163 unsigned Offset = MI.getOperand(4).getImm();
12164 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
12166 if (isSub)
12167 Offset = -Offset;
12168
12169 MachineMemOperand *MMO = *MI.memoperands_begin();
12170 BuildMI(*BB, MI, dl, TII->get(NewOpc))
12171 .add(MI.getOperand(0)) // Rn_wb
12172 .add(MI.getOperand(1)) // Rt
12173 .add(MI.getOperand(2)) // Rn
12174 .addImm(Offset) // offset (skip GPR==zero_reg)
12175 .add(MI.getOperand(5)) // pred
12176 .add(MI.getOperand(6))
12177 .addMemOperand(MMO);
12178 MI.eraseFromParent();
12179 return BB;
12180 }
12181 case ARM::STRr_preidx:
12182 case ARM::STRBr_preidx:
12183 case ARM::STRH_preidx: {
12184 unsigned NewOpc;
12185 switch (MI.getOpcode()) {
12186 default: llvm_unreachable("unexpected opcode!");
12187 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
12188 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
12189 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
12190 }
12191 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
12192 for (const MachineOperand &MO : MI.operands())
12193 MIB.add(MO);
12194 MI.eraseFromParent();
12195 return BB;
12196 }
12197
12198 case ARM::tMOVCCr_pseudo: {
12199 // To "insert" a SELECT_CC instruction, we actually have to insert the
12200 // diamond control-flow pattern. The incoming instruction knows the
12201 // destination vreg to set, the condition code register to branch on, the
12202 // true/false values to select between, and a branch opcode to use.
12203 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12205
12206 // thisMBB:
12207 // ...
12208 // TrueVal = ...
12209 // cmpTY ccX, r1, r2
12210 // bCC copy1MBB
12211 // fallthrough --> copy0MBB
12212 MachineBasicBlock *thisMBB = BB;
12213 MachineFunction *F = BB->getParent();
12214 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12215 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12216 F->insert(It, copy0MBB);
12217 F->insert(It, sinkMBB);
12218
12219 // Set the call frame size on entry to the new basic blocks.
12220 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12221 copy0MBB->setCallFrameSize(CallFrameSize);
12222 sinkMBB->setCallFrameSize(CallFrameSize);
12223
12224 // Check whether CPSR is live past the tMOVCCr_pseudo.
12225 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
12226 if (!MI.killsRegister(ARM::CPSR, /*TRI=*/nullptr) &&
12227 !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
12228 copy0MBB->addLiveIn(ARM::CPSR);
12229 sinkMBB->addLiveIn(ARM::CPSR);
12230 }
12231
12232 // Transfer the remainder of BB and its successor edges to sinkMBB.
12233 sinkMBB->splice(sinkMBB->begin(), BB,
12234 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12236
12237 BB->addSuccessor(copy0MBB);
12238 BB->addSuccessor(sinkMBB);
12239
12240 BuildMI(BB, dl, TII->get(ARM::tBcc))
12241 .addMBB(sinkMBB)
12242 .addImm(MI.getOperand(3).getImm())
12243 .addReg(MI.getOperand(4).getReg());
12244
12245 // copy0MBB:
12246 // %FalseValue = ...
12247 // # fallthrough to sinkMBB
12248 BB = copy0MBB;
12249
12250 // Update machine-CFG edges
12251 BB->addSuccessor(sinkMBB);
12252
12253 // sinkMBB:
12254 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12255 // ...
12256 BB = sinkMBB;
12257 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
12258 .addReg(MI.getOperand(1).getReg())
12259 .addMBB(copy0MBB)
12260 .addReg(MI.getOperand(2).getReg())
12261 .addMBB(thisMBB);
12262
12263 MI.eraseFromParent(); // The pseudo instruction is gone now.
12264 return BB;
12265 }
12266
12267 case ARM::BCCi64:
12268 case ARM::BCCZi64: {
12269 // If there is an unconditional branch to the other successor, remove it.
12270 BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
12271
12272 // Compare both parts that make up the double comparison separately for
12273 // equality.
12274 bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
12275
12276 Register LHS1 = MI.getOperand(1).getReg();
12277 Register LHS2 = MI.getOperand(2).getReg();
12278 if (RHSisZero) {
12279 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12280 .addReg(LHS1)
12281 .addImm(0)
12283 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12284 .addReg(LHS2).addImm(0)
12285 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12286 } else {
12287 Register RHS1 = MI.getOperand(3).getReg();
12288 Register RHS2 = MI.getOperand(4).getReg();
12289 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12290 .addReg(LHS1)
12291 .addReg(RHS1)
12293 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12294 .addReg(LHS2).addReg(RHS2)
12295 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12296 }
12297
12298 MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
12299 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
12300 if (MI.getOperand(0).getImm() == ARMCC::NE)
12301 std::swap(destMBB, exitMBB);
12302
12303 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
12304 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
12305 if (isThumb2)
12306 BuildMI(BB, dl, TII->get(ARM::t2B))
12307 .addMBB(exitMBB)
12309 else
12310 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
12311
12312 MI.eraseFromParent(); // The pseudo instruction is gone now.
12313 return BB;
12314 }
12315
12316 case ARM::Int_eh_sjlj_setjmp:
12317 case ARM::Int_eh_sjlj_setjmp_nofp:
12318 case ARM::tInt_eh_sjlj_setjmp:
12319 case ARM::t2Int_eh_sjlj_setjmp:
12320 case ARM::t2Int_eh_sjlj_setjmp_nofp:
12321 return BB;
12322
12323 case ARM::Int_eh_sjlj_setup_dispatch:
12324 EmitSjLjDispatchBlock(MI, BB);
12325 return BB;
12326
12327 case ARM::ABS:
12328 case ARM::t2ABS: {
12329 // To insert an ABS instruction, we have to insert the
12330 // diamond control-flow pattern. The incoming instruction knows the
12331 // source vreg to test against 0, the destination vreg to set,
12332 // the condition code register to branch on, the
12333 // true/false values to select between, and a branch opcode to use.
12334 // It transforms
12335 // V1 = ABS V0
12336 // into
12337 // V2 = MOVS V0
12338 // BCC (branch to SinkBB if V0 >= 0)
12339 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
12340 // SinkBB: V1 = PHI(V2, V3)
12341 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12343 MachineFunction *Fn = BB->getParent();
12344 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12345 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12346 Fn->insert(BBI, RSBBB);
12347 Fn->insert(BBI, SinkBB);
12348
12349 Register ABSSrcReg = MI.getOperand(1).getReg();
12350 Register ABSDstReg = MI.getOperand(0).getReg();
12351 bool ABSSrcKIll = MI.getOperand(1).isKill();
12352 bool isThumb2 = Subtarget->isThumb2();
12354 // In Thumb mode S must not be specified if source register is the SP or
12355 // PC and if destination register is the SP, so restrict register class
12356 Register NewRsbDstReg = MRI.createVirtualRegister(
12357 isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
12358
12359 // Transfer the remainder of BB and its successor edges to sinkMBB.
12360 SinkBB->splice(SinkBB->begin(), BB,
12361 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12363
12364 BB->addSuccessor(RSBBB);
12365 BB->addSuccessor(SinkBB);
12366
12367 // fall through to SinkMBB
12368 RSBBB->addSuccessor(SinkBB);
12369
12370 // insert a cmp at the end of BB
12371 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12372 .addReg(ABSSrcReg)
12373 .addImm(0)
12375
12376 // insert a bcc with opposite CC to ARMCC::MI at the end of BB
12377 BuildMI(BB, dl,
12378 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
12380
12381 // insert rsbri in RSBBB
12382 // Note: BCC and rsbri will be converted into predicated rsbmi
12383 // by if-conversion pass
12384 BuildMI(*RSBBB, RSBBB->begin(), dl,
12385 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
12386 .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
12387 .addImm(0)
12389 .add(condCodeOp());
12390
12391 // insert PHI in SinkBB,
12392 // reuse ABSDstReg to not change uses of ABS instruction
12393 BuildMI(*SinkBB, SinkBB->begin(), dl,
12394 TII->get(ARM::PHI), ABSDstReg)
12395 .addReg(NewRsbDstReg).addMBB(RSBBB)
12396 .addReg(ABSSrcReg).addMBB(BB);
12397
12398 // remove ABS instruction
12399 MI.eraseFromParent();
12400
12401 // return last added BB
12402 return SinkBB;
12403 }
12404 case ARM::COPY_STRUCT_BYVAL_I32:
12405 ++NumLoopByVals;
12406 return EmitStructByval(MI, BB);
12407 case ARM::WIN__CHKSTK:
12408 return EmitLowered__chkstk(MI, BB);
12409 case ARM::WIN__DBZCHK:
12410 return EmitLowered__dbzchk(MI, BB);
12411 }
12412}
12413
12414/// Attaches vregs to MEMCPY that it will use as scratch registers
12415/// when it is expanded into LDM/STM. This is done as a post-isel lowering
12416/// instead of as a custom inserter because we need the use list from the SDNode.
12417static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
12418 MachineInstr &MI, const SDNode *Node) {
12419 bool isThumb1 = Subtarget->isThumb1Only();
12420
12421 DebugLoc DL = MI.getDebugLoc();
12422 MachineFunction *MF = MI.getParent()->getParent();
12424 MachineInstrBuilder MIB(*MF, MI);
12425
12426 // If the new dst/src is unused mark it as dead.
12427 if (!Node->hasAnyUseOfValue(0)) {
12428 MI.getOperand(0).setIsDead(true);
12429 }
12430 if (!Node->hasAnyUseOfValue(1)) {
12431 MI.getOperand(1).setIsDead(true);
12432 }
12433
12434 // The MEMCPY both defines and kills the scratch registers.
12435 for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
12436 Register TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
12437 : &ARM::GPRRegClass);
12439 }
12440}
12441
12443 SDNode *Node) const {
12444 if (MI.getOpcode() == ARM::MEMCPY) {
12445 attachMEMCPYScratchRegs(Subtarget, MI, Node);
12446 return;
12447 }
12448
12449 const MCInstrDesc *MCID = &MI.getDesc();
12450 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
12451 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
12452 // operand is still set to noreg. If needed, set the optional operand's
12453 // register to CPSR, and remove the redundant implicit def.
12454 //
12455 // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
12456
12457 // Rename pseudo opcodes.
12458 unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
12459 unsigned ccOutIdx;
12460 if (NewOpc) {
12461 const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
12462 MCID = &TII->get(NewOpc);
12463
12464 assert(MCID->getNumOperands() ==
12465 MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
12466 && "converted opcode should be the same except for cc_out"
12467 " (and, on Thumb1, pred)");
12468
12469 MI.setDesc(*MCID);
12470
12471 // Add the optional cc_out operand
12472 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
12473
12474 // On Thumb1, move all input operands to the end, then add the predicate
12475 if (Subtarget->isThumb1Only()) {
12476 for (unsigned c = MCID->getNumOperands() - 4; c--;) {
12477 MI.addOperand(MI.getOperand(1));
12478 MI.removeOperand(1);
12479 }
12480
12481 // Restore the ties
12482 for (unsigned i = MI.getNumOperands(); i--;) {
12483 const MachineOperand& op = MI.getOperand(i);
12484 if (op.isReg() && op.isUse()) {
12485 int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
12486 if (DefIdx != -1)
12487 MI.tieOperands(DefIdx, i);
12488 }
12489 }
12490
12492 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
12493 ccOutIdx = 1;
12494 } else
12495 ccOutIdx = MCID->getNumOperands() - 1;
12496 } else
12497 ccOutIdx = MCID->getNumOperands() - 1;
12498
12499 // Any ARM instruction that sets the 's' bit should specify an optional
12500 // "cc_out" operand in the last operand position.
12501 if (!MI.hasOptionalDef() || !MCID->operands()[ccOutIdx].isOptionalDef()) {
12502 assert(!NewOpc && "Optional cc_out operand required");
12503 return;
12504 }
12505 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
12506 // since we already have an optional CPSR def.
12507 bool definesCPSR = false;
12508 bool deadCPSR = false;
12509 for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
12510 ++i) {
12511 const MachineOperand &MO = MI.getOperand(i);
12512 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
12513 definesCPSR = true;
12514 if (MO.isDead())
12515 deadCPSR = true;
12516 MI.removeOperand(i);
12517 break;
12518 }
12519 }
12520 if (!definesCPSR) {
12521 assert(!NewOpc && "Optional cc_out operand required");
12522 return;
12523 }
12524 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
12525 if (deadCPSR) {
12526 assert(!MI.getOperand(ccOutIdx).getReg() &&
12527 "expect uninitialized optional cc_out operand");
12528 // Thumb1 instructions must have the S bit even if the CPSR is dead.
12529 if (!Subtarget->isThumb1Only())
12530 return;
12531 }
12532
12533 // If this instruction was defined with an optional CPSR def and its dag node
12534 // had a live implicit CPSR def, then activate the optional CPSR def.
12535 MachineOperand &MO = MI.getOperand(ccOutIdx);
12536 MO.setReg(ARM::CPSR);
12537 MO.setIsDef(true);
12538}
12539
12540//===----------------------------------------------------------------------===//
12541// ARM Optimization Hooks
12542//===----------------------------------------------------------------------===//
12543
12544// Helper function that checks if N is a null or all ones constant.
12545static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
12547}
12548
12549// Return true if N is conditionally 0 or all ones.
12550// Detects these expressions where cc is an i1 value:
12551//
12552// (select cc 0, y) [AllOnes=0]
12553// (select cc y, 0) [AllOnes=0]
12554// (zext cc) [AllOnes=0]
12555// (sext cc) [AllOnes=0/1]
12556// (select cc -1, y) [AllOnes=1]
12557// (select cc y, -1) [AllOnes=1]
12558//
12559// Invert is set when N is the null/all ones constant when CC is false.
12560// OtherOp is set to the alternative value of N.
12562 SDValue &CC, bool &Invert,
12563 SDValue &OtherOp,
12564 SelectionDAG &DAG) {
12565 switch (N->getOpcode()) {
12566 default: return false;
12567 case ISD::SELECT: {
12568 CC = N->getOperand(0);
12569 SDValue N1 = N->getOperand(1);
12570 SDValue N2 = N->getOperand(2);
12571 if (isZeroOrAllOnes(N1, AllOnes)) {
12572 Invert = false;
12573 OtherOp = N2;
12574 return true;
12575 }
12576 if (isZeroOrAllOnes(N2, AllOnes)) {
12577 Invert = true;
12578 OtherOp = N1;
12579 return true;
12580 }
12581 return false;
12582 }
12583 case ISD::ZERO_EXTEND:
12584 // (zext cc) can never be the all ones value.
12585 if (AllOnes)
12586 return false;
12587 [[fallthrough]];
12588 case ISD::SIGN_EXTEND: {
12589 SDLoc dl(N);
12590 EVT VT = N->getValueType(0);
12591 CC = N->getOperand(0);
12592 if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
12593 return false;
12594 Invert = !AllOnes;
12595 if (AllOnes)
12596 // When looking for an AllOnes constant, N is an sext, and the 'other'
12597 // value is 0.
12598 OtherOp = DAG.getConstant(0, dl, VT);
12599 else if (N->getOpcode() == ISD::ZERO_EXTEND)
12600 // When looking for a 0 constant, N can be zext or sext.
12601 OtherOp = DAG.getConstant(1, dl, VT);
12602 else
12603 OtherOp = DAG.getAllOnesConstant(dl, VT);
12604 return true;
12605 }
12606 }
12607}
12608
12609// Combine a constant select operand into its use:
12610//
12611// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
12612// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
12613// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
12614// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
12615// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
12616//
12617// The transform is rejected if the select doesn't have a constant operand that
12618// is null, or all ones when AllOnes is set.
12619//
12620// Also recognize sext/zext from i1:
12621//
12622// (add (zext cc), x) -> (select cc (add x, 1), x)
12623// (add (sext cc), x) -> (select cc (add x, -1), x)
12624//
12625// These transformations eventually create predicated instructions.
12626//
12627// @param N The node to transform.
12628// @param Slct The N operand that is a select.
12629// @param OtherOp The other N operand (x above).
12630// @param DCI Context.
12631// @param AllOnes Require the select constant to be all ones instead of null.
12632// @returns The new node, or SDValue() on failure.
12633static
12636 bool AllOnes = false) {
12637 SelectionDAG &DAG = DCI.DAG;
12638 EVT VT = N->getValueType(0);
12639 SDValue NonConstantVal;
12640 SDValue CCOp;
12641 bool SwapSelectOps;
12642 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
12643 NonConstantVal, DAG))
12644 return SDValue();
12645
12646 // Slct is now know to be the desired identity constant when CC is true.
12647 SDValue TrueVal = OtherOp;
12648 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
12649 OtherOp, NonConstantVal);
12650 // Unless SwapSelectOps says CC should be false.
12651 if (SwapSelectOps)
12652 std::swap(TrueVal, FalseVal);
12653
12654 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12655 CCOp, TrueVal, FalseVal);
12656}
12657
12658// Attempt combineSelectAndUse on each operand of a commutative operator N.
12659static
12662 SDValue N0 = N->getOperand(0);
12663 SDValue N1 = N->getOperand(1);
12664 if (N0.getNode()->hasOneUse())
12665 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
12666 return Result;
12667 if (N1.getNode()->hasOneUse())
12668 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
12669 return Result;
12670 return SDValue();
12671}
12672
12674 // VUZP shuffle node.
12675 if (N->getOpcode() == ARMISD::VUZP)
12676 return true;
12677
12678 // "VUZP" on i32 is an alias for VTRN.
12679 if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
12680 return true;
12681
12682 return false;
12683}
12684
12687 const ARMSubtarget *Subtarget) {
12688 // Look for ADD(VUZP.0, VUZP.1).
12689 if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
12690 N0 == N1)
12691 return SDValue();
12692
12693 // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
12694 if (!N->getValueType(0).is64BitVector())
12695 return SDValue();
12696
12697 // Generate vpadd.
12698 SelectionDAG &DAG = DCI.DAG;
12699 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12700 SDLoc dl(N);
12701 SDNode *Unzip = N0.getNode();
12702 EVT VT = N->getValueType(0);
12703
12705 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
12706 TLI.getPointerTy(DAG.getDataLayout())));
12707 Ops.push_back(Unzip->getOperand(0));
12708 Ops.push_back(Unzip->getOperand(1));
12709
12710 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12711}
12712
12715 const ARMSubtarget *Subtarget) {
12716 // Check for two extended operands.
12717 if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
12718 N1.getOpcode() == ISD::SIGN_EXTEND) &&
12719 !(N0.getOpcode() == ISD::ZERO_EXTEND &&
12720 N1.getOpcode() == ISD::ZERO_EXTEND))
12721 return SDValue();
12722
12723 SDValue N00 = N0.getOperand(0);
12724 SDValue N10 = N1.getOperand(0);
12725
12726 // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
12727 if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
12728 N00 == N10)
12729 return SDValue();
12730
12731 // We only recognize Q register paddl here; this can't be reached until
12732 // after type legalization.
12733 if (!N00.getValueType().is64BitVector() ||
12735 return SDValue();
12736
12737 // Generate vpaddl.
12738 SelectionDAG &DAG = DCI.DAG;
12739 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12740 SDLoc dl(N);
12741 EVT VT = N->getValueType(0);
12742
12744 // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
12745 unsigned Opcode;
12746 if (N0.getOpcode() == ISD::SIGN_EXTEND)
12747 Opcode = Intrinsic::arm_neon_vpaddls;
12748 else
12749 Opcode = Intrinsic::arm_neon_vpaddlu;
12750 Ops.push_back(DAG.getConstant(Opcode, dl,
12751 TLI.getPointerTy(DAG.getDataLayout())));
12752 EVT ElemTy = N00.getValueType().getVectorElementType();
12753 unsigned NumElts = VT.getVectorNumElements();
12754 EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
12755 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
12756 N00.getOperand(0), N00.getOperand(1));
12757 Ops.push_back(Concat);
12758
12759 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12760}
12761
12762// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
12763// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
12764// much easier to match.
12765static SDValue
12768 const ARMSubtarget *Subtarget) {
12769 // Only perform optimization if after legalize, and if NEON is available. We
12770 // also expected both operands to be BUILD_VECTORs.
12771 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
12772 || N0.getOpcode() != ISD::BUILD_VECTOR
12773 || N1.getOpcode() != ISD::BUILD_VECTOR)
12774 return SDValue();
12775
12776 // Check output type since VPADDL operand elements can only be 8, 16, or 32.
12777 EVT VT = N->getValueType(0);
12778 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
12779 return SDValue();
12780
12781 // Check that the vector operands are of the right form.
12782 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
12783 // operands, where N is the size of the formed vector.
12784 // Each EXTRACT_VECTOR should have the same input vector and odd or even
12785 // index such that we have a pair wise add pattern.
12786
12787 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
12789 return SDValue();
12790 SDValue Vec = N0->getOperand(0)->getOperand(0);
12791 SDNode *V = Vec.getNode();
12792 unsigned nextIndex = 0;
12793
12794 // For each operands to the ADD which are BUILD_VECTORs,
12795 // check to see if each of their operands are an EXTRACT_VECTOR with
12796 // the same vector and appropriate index.
12797 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
12800
12801 SDValue ExtVec0 = N0->getOperand(i);
12802 SDValue ExtVec1 = N1->getOperand(i);
12803
12804 // First operand is the vector, verify its the same.
12805 if (V != ExtVec0->getOperand(0).getNode() ||
12806 V != ExtVec1->getOperand(0).getNode())
12807 return SDValue();
12808
12809 // Second is the constant, verify its correct.
12810 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
12811 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
12812
12813 // For the constant, we want to see all the even or all the odd.
12814 if (!C0 || !C1 || C0->getZExtValue() != nextIndex
12815 || C1->getZExtValue() != nextIndex+1)
12816 return SDValue();
12817
12818 // Increment index.
12819 nextIndex+=2;
12820 } else
12821 return SDValue();
12822 }
12823
12824 // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
12825 // we're using the entire input vector, otherwise there's a size/legality
12826 // mismatch somewhere.
12827 if (nextIndex != Vec.getValueType().getVectorNumElements() ||
12829 return SDValue();
12830
12831 // Create VPADDL node.
12832 SelectionDAG &DAG = DCI.DAG;
12833 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12834
12835 SDLoc dl(N);
12836
12837 // Build operand list.
12839 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
12840 TLI.getPointerTy(DAG.getDataLayout())));
12841
12842 // Input is the vector.
12843 Ops.push_back(Vec);
12844
12845 // Get widened type and narrowed type.
12846 MVT widenType;
12847 unsigned numElem = VT.getVectorNumElements();
12848
12849 EVT inputLaneType = Vec.getValueType().getVectorElementType();
12850 switch (inputLaneType.getSimpleVT().SimpleTy) {
12851 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
12852 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
12853 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
12854 default:
12855 llvm_unreachable("Invalid vector element type for padd optimization.");
12856 }
12857
12858 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
12859 unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
12860 return DAG.getNode(ExtOp, dl, VT, tmp);
12861}
12862
12864 if (V->getOpcode() == ISD::UMUL_LOHI ||
12865 V->getOpcode() == ISD::SMUL_LOHI)
12866 return V;
12867 return SDValue();
12868}
12869
12870static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
12872 const ARMSubtarget *Subtarget) {
12873 if (!Subtarget->hasBaseDSP())
12874 return SDValue();
12875
12876 // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
12877 // accumulates the product into a 64-bit value. The 16-bit values will
12878 // be sign extended somehow or SRA'd into 32-bit values
12879 // (addc (adde (mul 16bit, 16bit), lo), hi)
12880 SDValue Mul = AddcNode->getOperand(0);
12881 SDValue Lo = AddcNode->getOperand(1);
12882 if (Mul.getOpcode() != ISD::MUL) {
12883 Lo = AddcNode->getOperand(0);
12884 Mul = AddcNode->getOperand(1);
12885 if (Mul.getOpcode() != ISD::MUL)
12886 return SDValue();
12887 }
12888
12889 SDValue SRA = AddeNode->getOperand(0);
12890 SDValue Hi = AddeNode->getOperand(1);
12891 if (SRA.getOpcode() != ISD::SRA) {
12892 SRA = AddeNode->getOperand(1);
12893 Hi = AddeNode->getOperand(0);
12894 if (SRA.getOpcode() != ISD::SRA)
12895 return SDValue();
12896 }
12897 if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
12898 if (Const->getZExtValue() != 31)
12899 return SDValue();
12900 } else
12901 return SDValue();
12902
12903 if (SRA.getOperand(0) != Mul)
12904 return SDValue();
12905
12906 SelectionDAG &DAG = DCI.DAG;
12907 SDLoc dl(AddcNode);
12908 unsigned Opcode = 0;
12909 SDValue Op0;
12910 SDValue Op1;
12911
12912 if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
12913 Opcode = ARMISD::SMLALBB;
12914 Op0 = Mul.getOperand(0);
12915 Op1 = Mul.getOperand(1);
12916 } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
12917 Opcode = ARMISD::SMLALBT;
12918 Op0 = Mul.getOperand(0);
12919 Op1 = Mul.getOperand(1).getOperand(0);
12920 } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
12921 Opcode = ARMISD::SMLALTB;
12922 Op0 = Mul.getOperand(0).getOperand(0);
12923 Op1 = Mul.getOperand(1);
12924 } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
12925 Opcode = ARMISD::SMLALTT;
12926 Op0 = Mul->getOperand(0).getOperand(0);
12927 Op1 = Mul->getOperand(1).getOperand(0);
12928 }
12929
12930 if (!Op0 || !Op1)
12931 return SDValue();
12932
12933 SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
12934 Op0, Op1, Lo, Hi);
12935 // Replace the ADDs' nodes uses by the MLA node's values.
12936 SDValue HiMLALResult(SMLAL.getNode(), 1);
12937 SDValue LoMLALResult(SMLAL.getNode(), 0);
12938
12939 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
12940 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
12941
12942 // Return original node to notify the driver to stop replacing.
12943 SDValue resNode(AddcNode, 0);
12944 return resNode;
12945}
12946
12949 const ARMSubtarget *Subtarget) {
12950 // Look for multiply add opportunities.
12951 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
12952 // each add nodes consumes a value from ISD::UMUL_LOHI and there is
12953 // a glue link from the first add to the second add.
12954 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
12955 // a S/UMLAL instruction.
12956 // UMUL_LOHI
12957 // / :lo \ :hi
12958 // V \ [no multiline comment]
12959 // loAdd -> ADDC |
12960 // \ :carry /
12961 // V V
12962 // ADDE <- hiAdd
12963 //
12964 // In the special case where only the higher part of a signed result is used
12965 // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts
12966 // a constant with the exact value of 0x80000000, we recognize we are dealing
12967 // with a "rounded multiply and add" (or subtract) and transform it into
12968 // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.
12969
12970 assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||
12971 AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
12972 "Expect an ADDE or SUBE");
12973
12974 assert(AddeSubeNode->getNumOperands() == 3 &&
12975 AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&
12976 "ADDE node has the wrong inputs");
12977
12978 // Check that we are chained to the right ADDC or SUBC node.
12979 SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
12980 if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12981 AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
12982 (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
12983 AddcSubcNode->getOpcode() != ARMISD::SUBC))
12984 return SDValue();
12985
12986 SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
12987 SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
12988
12989 // Check if the two operands are from the same mul_lohi node.
12990 if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())
12991 return SDValue();
12992
12993 assert(AddcSubcNode->getNumValues() == 2 &&
12994 AddcSubcNode->getValueType(0) == MVT::i32 &&
12995 "Expect ADDC with two result values. First: i32");
12996
12997 // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
12998 // maybe a SMLAL which multiplies two 16-bit values.
12999 if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
13000 AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
13001 AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
13002 AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
13003 AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
13004 return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);
13005
13006 // Check for the triangle shape.
13007 SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
13008 SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
13009
13010 // Make sure that the ADDE/SUBE operands are not coming from the same node.
13011 if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())
13012 return SDValue();
13013
13014 // Find the MUL_LOHI node walking up ADDE/SUBE's operands.
13015 bool IsLeftOperandMUL = false;
13016 SDValue MULOp = findMUL_LOHI(AddeSubeOp0);
13017 if (MULOp == SDValue())
13018 MULOp = findMUL_LOHI(AddeSubeOp1);
13019 else
13020 IsLeftOperandMUL = true;
13021 if (MULOp == SDValue())
13022 return SDValue();
13023
13024 // Figure out the right opcode.
13025 unsigned Opc = MULOp->getOpcode();
13026 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
13027
13028 // Figure out the high and low input values to the MLAL node.
13029 SDValue *HiAddSub = nullptr;
13030 SDValue *LoMul = nullptr;
13031 SDValue *LowAddSub = nullptr;
13032
13033 // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.
13034 if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))
13035 return SDValue();
13036
13037 if (IsLeftOperandMUL)
13038 HiAddSub = &AddeSubeOp1;
13039 else
13040 HiAddSub = &AddeSubeOp0;
13041
13042 // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node
13043 // whose low result is fed to the ADDC/SUBC we are checking.
13044
13045 if (AddcSubcOp0 == MULOp.getValue(0)) {
13046 LoMul = &AddcSubcOp0;
13047 LowAddSub = &AddcSubcOp1;
13048 }
13049 if (AddcSubcOp1 == MULOp.getValue(0)) {
13050 LoMul = &AddcSubcOp1;
13051 LowAddSub = &AddcSubcOp0;
13052 }
13053
13054 if (!LoMul)
13055 return SDValue();
13056
13057 // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC
13058 // the replacement below will create a cycle.
13059 if (AddcSubcNode == HiAddSub->getNode() ||
13060 AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
13061 return SDValue();
13062
13063 // Create the merged node.
13064 SelectionDAG &DAG = DCI.DAG;
13065
13066 // Start building operand list.
13068 Ops.push_back(LoMul->getOperand(0));
13069 Ops.push_back(LoMul->getOperand(1));
13070
13071 // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be
13072 // the case, we must be doing signed multiplication and only use the higher
13073 // part of the result of the MLAL, furthermore the LowAddSub must be a constant
13074 // addition or subtraction with the value of 0x800000.
13075 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
13076 FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
13077 LowAddSub->getNode()->getOpcode() == ISD::Constant &&
13078 static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
13079 0x80000000) {
13080 Ops.push_back(*HiAddSub);
13081 if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
13082 FinalOpc = ARMISD::SMMLSR;
13083 } else {
13084 FinalOpc = ARMISD::SMMLAR;
13085 }
13086 SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);
13087 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);
13088
13089 return SDValue(AddeSubeNode, 0);
13090 } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
13091 // SMMLS is generated during instruction selection and the rest of this
13092 // function can not handle the case where AddcSubcNode is a SUBC.
13093 return SDValue();
13094
13095 // Finish building the operand list for {U/S}MLAL
13096 Ops.push_back(*LowAddSub);
13097 Ops.push_back(*HiAddSub);
13098
13099 SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),
13100 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13101
13102 // Replace the ADDs' nodes uses by the MLA node's values.
13103 SDValue HiMLALResult(MLALNode.getNode(), 1);
13104 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);
13105
13106 SDValue LoMLALResult(MLALNode.getNode(), 0);
13107 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);
13108
13109 // Return original node to notify the driver to stop replacing.
13110 return SDValue(AddeSubeNode, 0);
13111}
13112
13115 const ARMSubtarget *Subtarget) {
13116 // UMAAL is similar to UMLAL except that it adds two unsigned values.
13117 // While trying to combine for the other MLAL nodes, first search for the
13118 // chance to use UMAAL. Check if Addc uses a node which has already
13119 // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
13120 // as the addend, and it's handled in PerformUMLALCombine.
13121
13122 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13123 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13124
13125 // Check that we have a glued ADDC node.
13126 SDNode* AddcNode = AddeNode->getOperand(2).getNode();
13127 if (AddcNode->getOpcode() != ARMISD::ADDC)
13128 return SDValue();
13129
13130 // Find the converted UMAAL or quit if it doesn't exist.
13131 SDNode *UmlalNode = nullptr;
13132 SDValue AddHi;
13133 if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
13134 UmlalNode = AddcNode->getOperand(0).getNode();
13135 AddHi = AddcNode->getOperand(1);
13136 } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
13137 UmlalNode = AddcNode->getOperand(1).getNode();
13138 AddHi = AddcNode->getOperand(0);
13139 } else {
13140 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13141 }
13142
13143 // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
13144 // the ADDC as well as Zero.
13145 if (!isNullConstant(UmlalNode->getOperand(3)))
13146 return SDValue();
13147
13148 if ((isNullConstant(AddeNode->getOperand(0)) &&
13149 AddeNode->getOperand(1).getNode() == UmlalNode) ||
13150 (AddeNode->getOperand(0).getNode() == UmlalNode &&
13151 isNullConstant(AddeNode->getOperand(1)))) {
13152 SelectionDAG &DAG = DCI.DAG;
13153 SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
13154 UmlalNode->getOperand(2), AddHi };
13155 SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
13156 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13157
13158 // Replace the ADDs' nodes uses by the UMAAL node's values.
13159 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
13160 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
13161
13162 // Return original node to notify the driver to stop replacing.
13163 return SDValue(AddeNode, 0);
13164 }
13165 return SDValue();
13166}
13167
13169 const ARMSubtarget *Subtarget) {
13170 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13171 return SDValue();
13172
13173 // Check that we have a pair of ADDC and ADDE as operands.
13174 // Both addends of the ADDE must be zero.
13175 SDNode* AddcNode = N->getOperand(2).getNode();
13176 SDNode* AddeNode = N->getOperand(3).getNode();
13177 if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
13178 (AddeNode->getOpcode() == ARMISD::ADDE) &&
13179 isNullConstant(AddeNode->getOperand(0)) &&
13180 isNullConstant(AddeNode->getOperand(1)) &&
13181 (AddeNode->getOperand(2).getNode() == AddcNode))
13182 return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
13183 DAG.getVTList(MVT::i32, MVT::i32),
13184 {N->getOperand(0), N->getOperand(1),
13185 AddcNode->getOperand(0), AddcNode->getOperand(1)});
13186 else
13187 return SDValue();
13188}
13189
13192 const ARMSubtarget *Subtarget) {
13193 SelectionDAG &DAG(DCI.DAG);
13194
13195 if (N->getOpcode() == ARMISD::SUBC && N->hasAnyUseOfValue(1)) {
13196 // (SUBC (ADDE 0, 0, C), 1) -> C
13197 SDValue LHS = N->getOperand(0);
13198 SDValue RHS = N->getOperand(1);
13199 if (LHS->getOpcode() == ARMISD::ADDE &&
13200 isNullConstant(LHS->getOperand(0)) &&
13201 isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
13202 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
13203 }
13204 }
13205
13206 if (Subtarget->isThumb1Only()) {
13207 SDValue RHS = N->getOperand(1);
13208 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13209 int32_t imm = C->getSExtValue();
13210 if (imm < 0 && imm > std::numeric_limits<int>::min()) {
13211 SDLoc DL(N);
13212 RHS = DAG.getConstant(-imm, DL, MVT::i32);
13213 unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
13214 : ARMISD::ADDC;
13215 return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
13216 }
13217 }
13218 }
13219
13220 return SDValue();
13221}
13222
13225 const ARMSubtarget *Subtarget) {
13226 if (Subtarget->isThumb1Only()) {
13227 SelectionDAG &DAG = DCI.DAG;
13228 SDValue RHS = N->getOperand(1);
13229 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13230 int64_t imm = C->getSExtValue();
13231 if (imm < 0) {
13232 SDLoc DL(N);
13233
13234 // The with-carry-in form matches bitwise not instead of the negation.
13235 // Effectively, the inverse interpretation of the carry flag already
13236 // accounts for part of the negation.
13237 RHS = DAG.getConstant(~imm, DL, MVT::i32);
13238
13239 unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
13240 : ARMISD::ADDE;
13241 return DAG.getNode(Opcode, DL, N->getVTList(),
13242 N->getOperand(0), RHS, N->getOperand(2));
13243 }
13244 }
13245 } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
13246 return AddCombineTo64bitMLAL(N, DCI, Subtarget);
13247 }
13248 return SDValue();
13249}
13250
13253 const ARMSubtarget *Subtarget) {
13254 if (!Subtarget->hasMVEIntegerOps())
13255 return SDValue();
13256
13257 SDLoc dl(N);
13258 SDValue SetCC;
13259 SDValue LHS;
13260 SDValue RHS;
13262 SDValue TrueVal;
13263 SDValue FalseVal;
13264
13265 if (N->getOpcode() == ISD::SELECT &&
13266 N->getOperand(0)->getOpcode() == ISD::SETCC) {
13267 SetCC = N->getOperand(0);
13268 LHS = SetCC->getOperand(0);
13269 RHS = SetCC->getOperand(1);
13270 CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
13271 TrueVal = N->getOperand(1);
13272 FalseVal = N->getOperand(2);
13273 } else if (N->getOpcode() == ISD::SELECT_CC) {
13274 LHS = N->getOperand(0);
13275 RHS = N->getOperand(1);
13276 CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
13277 TrueVal = N->getOperand(2);
13278 FalseVal = N->getOperand(3);
13279 } else {
13280 return SDValue();
13281 }
13282
13283 unsigned int Opcode = 0;
13284 if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
13285 FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
13286 (CC == ISD::SETULT || CC == ISD::SETUGT)) {
13287 Opcode = ARMISD::VMINVu;
13288 if (CC == ISD::SETUGT)
13289 std::swap(TrueVal, FalseVal);
13290 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
13291 FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
13292 (CC == ISD::SETLT || CC == ISD::SETGT)) {
13293 Opcode = ARMISD::VMINVs;
13294 if (CC == ISD::SETGT)
13295 std::swap(TrueVal, FalseVal);
13296 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
13297 FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
13298 (CC == ISD::SETUGT || CC == ISD::SETULT)) {
13299 Opcode = ARMISD::VMAXVu;
13300 if (CC == ISD::SETULT)
13301 std::swap(TrueVal, FalseVal);
13302 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
13303 FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
13304 (CC == ISD::SETGT || CC == ISD::SETLT)) {
13305 Opcode = ARMISD::VMAXVs;
13306 if (CC == ISD::SETLT)
13307 std::swap(TrueVal, FalseVal);
13308 } else
13309 return SDValue();
13310
13311 // Normalise to the right hand side being the vector reduction
13312 switch (TrueVal->getOpcode()) {
13317 std::swap(LHS, RHS);
13318 std::swap(TrueVal, FalseVal);
13319 break;
13320 }
13321
13322 EVT VectorType = FalseVal->getOperand(0).getValueType();
13323
13324 if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&
13325 VectorType != MVT::v4i32)
13326 return SDValue();
13327
13328 EVT VectorScalarType = VectorType.getVectorElementType();
13329
13330 // The values being selected must also be the ones being compared
13331 if (TrueVal != LHS || FalseVal != RHS)
13332 return SDValue();
13333
13334 EVT LeftType = LHS->getValueType(0);
13335 EVT RightType = RHS->getValueType(0);
13336
13337 // The types must match the reduced type too
13338 if (LeftType != VectorScalarType || RightType != VectorScalarType)
13339 return SDValue();
13340
13341 // Legalise the scalar to an i32
13342 if (VectorScalarType != MVT::i32)
13343 LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
13344
13345 // Generate the reduction as an i32 for legalisation purposes
13346 auto Reduction =
13347 DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
13348
13349 // The result isn't actually an i32 so truncate it back to its original type
13350 if (VectorScalarType != MVT::i32)
13351 Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);
13352
13353 return Reduction;
13354}
13355
13356// A special combine for the vqdmulh family of instructions. This is one of the
13357// potential set of patterns that could patch this instruction. The base pattern
13358// you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))).
13359// This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))),
13360// which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as
13361// the max is unnecessary.
13363 EVT VT = N->getValueType(0);
13364 SDValue Shft;
13365 ConstantSDNode *Clamp;
13366
13367 if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
13368 return SDValue();
13369
13370 if (N->getOpcode() == ISD::SMIN) {
13371 Shft = N->getOperand(0);
13372 Clamp = isConstOrConstSplat(N->getOperand(1));
13373 } else if (N->getOpcode() == ISD::VSELECT) {
13374 // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin.
13375 SDValue Cmp = N->getOperand(0);
13376 if (Cmp.getOpcode() != ISD::SETCC ||
13377 cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
13378 Cmp.getOperand(0) != N->getOperand(1) ||
13379 Cmp.getOperand(1) != N->getOperand(2))
13380 return SDValue();
13381 Shft = N->getOperand(1);
13382 Clamp = isConstOrConstSplat(N->getOperand(2));
13383 } else
13384 return SDValue();
13385
13386 if (!Clamp)
13387 return SDValue();
13388
13389 MVT ScalarType;
13390 int ShftAmt = 0;
13391 switch (Clamp->getSExtValue()) {
13392 case (1 << 7) - 1:
13393 ScalarType = MVT::i8;
13394 ShftAmt = 7;
13395 break;
13396 case (1 << 15) - 1:
13397 ScalarType = MVT::i16;
13398 ShftAmt = 15;
13399 break;
13400 case (1ULL << 31) - 1:
13401 ScalarType = MVT::i32;
13402 ShftAmt = 31;
13403 break;
13404 default:
13405 return SDValue();
13406 }
13407
13408 if (Shft.getOpcode() != ISD::SRA)
13409 return SDValue();
13411 if (!N1 || N1->getSExtValue() != ShftAmt)
13412 return SDValue();
13413
13414 SDValue Mul = Shft.getOperand(0);
13415 if (Mul.getOpcode() != ISD::MUL)
13416 return SDValue();
13417
13418 SDValue Ext0 = Mul.getOperand(0);
13419 SDValue Ext1 = Mul.getOperand(1);
13420 if (Ext0.getOpcode() != ISD::SIGN_EXTEND ||
13421 Ext1.getOpcode() != ISD::SIGN_EXTEND)
13422 return SDValue();
13423 EVT VecVT = Ext0.getOperand(0).getValueType();
13424 if (!VecVT.isPow2VectorType() || VecVT.getVectorNumElements() == 1)
13425 return SDValue();
13426 if (Ext1.getOperand(0).getValueType() != VecVT ||
13427 VecVT.getScalarType() != ScalarType ||
13428 VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2)
13429 return SDValue();
13430
13431 SDLoc DL(Mul);
13432 unsigned LegalLanes = 128 / (ShftAmt + 1);
13433 EVT LegalVecVT = MVT::getVectorVT(ScalarType, LegalLanes);
13434 // For types smaller than legal vectors extend to be legal and only use needed
13435 // lanes.
13436 if (VecVT.getSizeInBits() < 128) {
13437 EVT ExtVecVT =
13439 VecVT.getVectorNumElements());
13440 SDValue Inp0 =
13441 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext0.getOperand(0));
13442 SDValue Inp1 =
13443 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext1.getOperand(0));
13444 Inp0 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp0);
13445 Inp1 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp1);
13446 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13447 SDValue Trunc = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, ExtVecVT, VQDMULH);
13448 Trunc = DAG.getNode(ISD::TRUNCATE, DL, VecVT, Trunc);
13449 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Trunc);
13450 }
13451
13452 // For larger types, split into legal sized chunks.
13453 assert(VecVT.getSizeInBits() % 128 == 0 && "Expected a power2 type");
13454 unsigned NumParts = VecVT.getSizeInBits() / 128;
13456 for (unsigned I = 0; I < NumParts; ++I) {
13457 SDValue Inp0 =
13458 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext0.getOperand(0),
13459 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13460 SDValue Inp1 =
13461 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext1.getOperand(0),
13462 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13463 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13464 Parts.push_back(VQDMULH);
13465 }
13466 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT,
13467 DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Parts));
13468}
13469
13472 const ARMSubtarget *Subtarget) {
13473 if (!Subtarget->hasMVEIntegerOps())
13474 return SDValue();
13475
13476 if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG))
13477 return V;
13478
13479 // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
13480 //
13481 // We need to re-implement this optimization here as the implementation in the
13482 // Target-Independent DAGCombiner does not handle the kind of constant we make
13483 // (it calls isConstOrConstSplat with AllowTruncation set to false - and for
13484 // good reason, allowing truncation there would break other targets).
13485 //
13486 // Currently, this is only done for MVE, as it's the only target that benefits
13487 // from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).
13488 if (N->getOperand(0).getOpcode() != ISD::XOR)
13489 return SDValue();
13490 SDValue XOR = N->getOperand(0);
13491
13492 // Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s.
13493 // It is important to check with truncation allowed as the BUILD_VECTORs we
13494 // generate in those situations will truncate their operands.
13495 ConstantSDNode *Const =
13496 isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,
13497 /*AllowTruncation*/ true);
13498 if (!Const || !Const->isOne())
13499 return SDValue();
13500
13501 // Rewrite into vselect(cond, rhs, lhs).
13502 SDValue Cond = XOR->getOperand(0);
13503 SDValue LHS = N->getOperand(1);
13504 SDValue RHS = N->getOperand(2);
13505 EVT Type = N->getValueType(0);
13506 return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);
13507}
13508
13509// Convert vsetcc([0,1,2,..], splat(n), ult) -> vctp n
13512 const ARMSubtarget *Subtarget) {
13513 SDValue Op0 = N->getOperand(0);
13514 SDValue Op1 = N->getOperand(1);
13515 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13516 EVT VT = N->getValueType(0);
13517
13518 if (!Subtarget->hasMVEIntegerOps() ||
13520 return SDValue();
13521
13522 if (CC == ISD::SETUGE) {
13523 std::swap(Op0, Op1);
13524 CC = ISD::SETULT;
13525 }
13526
13527 if (CC != ISD::SETULT || VT.getScalarSizeInBits() != 1 ||
13529 return SDValue();
13530
13531 // Check first operand is BuildVector of 0,1,2,...
13532 for (unsigned I = 0; I < VT.getVectorNumElements(); I++) {
13533 if (!Op0.getOperand(I).isUndef() &&
13534 !(isa<ConstantSDNode>(Op0.getOperand(I)) &&
13535 Op0.getConstantOperandVal(I) == I))
13536 return SDValue();
13537 }
13538
13539 // The second is a Splat of Op1S
13540 SDValue Op1S = DCI.DAG.getSplatValue(Op1);
13541 if (!Op1S)
13542 return SDValue();
13543
13544 unsigned Opc;
13545 switch (VT.getVectorNumElements()) {
13546 case 2:
13547 Opc = Intrinsic::arm_mve_vctp64;
13548 break;
13549 case 4:
13550 Opc = Intrinsic::arm_mve_vctp32;
13551 break;
13552 case 8:
13553 Opc = Intrinsic::arm_mve_vctp16;
13554 break;
13555 case 16:
13556 Opc = Intrinsic::arm_mve_vctp8;
13557 break;
13558 default:
13559 return SDValue();
13560 }
13561
13562 SDLoc DL(N);
13563 return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13564 DCI.DAG.getConstant(Opc, DL, MVT::i32),
13565 DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32));
13566}
13567
13568/// PerformADDECombine - Target-specific dag combine transform from
13569/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
13570/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
13573 const ARMSubtarget *Subtarget) {
13574 // Only ARM and Thumb2 support UMLAL/SMLAL.
13575 if (Subtarget->isThumb1Only())
13576 return PerformAddeSubeCombine(N, DCI, Subtarget);
13577
13578 // Only perform the checks after legalize when the pattern is available.
13579 if (DCI.isBeforeLegalize()) return SDValue();
13580
13581 return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
13582}
13583
13584/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
13585/// operands N0 and N1. This is a helper for PerformADDCombine that is
13586/// called with the default operands, and if that fails, with commuted
13587/// operands.
13590 const ARMSubtarget *Subtarget){
13591 // Attempt to create vpadd for this add.
13592 if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
13593 return Result;
13594
13595 // Attempt to create vpaddl for this add.
13596 if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
13597 return Result;
13598 if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
13599 Subtarget))
13600 return Result;
13601
13602 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
13603 if (N0.getNode()->hasOneUse())
13604 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
13605 return Result;
13606 return SDValue();
13607}
13608
13610 EVT VT = N->getValueType(0);
13611 SDValue N0 = N->getOperand(0);
13612 SDValue N1 = N->getOperand(1);
13613 SDLoc dl(N);
13614
13615 auto IsVecReduce = [](SDValue Op) {
13616 switch (Op.getOpcode()) {
13617 case ISD::VECREDUCE_ADD:
13618 case ARMISD::VADDVs:
13619 case ARMISD::VADDVu:
13620 case ARMISD::VMLAVs:
13621 case ARMISD::VMLAVu:
13622 return true;
13623 }
13624 return false;
13625 };
13626
13627 auto DistrubuteAddAddVecReduce = [&](SDValue N0, SDValue N1) {
13628 // Distribute add(X, add(vecreduce(Y), vecreduce(Z))) ->
13629 // add(add(X, vecreduce(Y)), vecreduce(Z))
13630 // to make better use of vaddva style instructions.
13631 if (VT == MVT::i32 && N1.getOpcode() == ISD::ADD && !IsVecReduce(N0) &&
13632 IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1)) &&
13633 !isa<ConstantSDNode>(N0) && N1->hasOneUse()) {
13634 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0, N1.getOperand(0));
13635 return DAG.getNode(ISD::ADD, dl, VT, Add0, N1.getOperand(1));
13636 }
13637 // And turn add(add(A, reduce(B)), add(C, reduce(D))) ->
13638 // add(add(add(A, C), reduce(B)), reduce(D))
13639 if (VT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
13640 N1.getOpcode() == ISD::ADD && N0->hasOneUse() && N1->hasOneUse()) {
13641 unsigned N0RedOp = 0;
13642 if (!IsVecReduce(N0.getOperand(N0RedOp))) {
13643 N0RedOp = 1;
13644 if (!IsVecReduce(N0.getOperand(N0RedOp)))
13645 return SDValue();
13646 }
13647
13648 unsigned N1RedOp = 0;
13649 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13650 N1RedOp = 1;
13651 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13652 return SDValue();
13653
13654 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0.getOperand(1 - N0RedOp),
13655 N1.getOperand(1 - N1RedOp));
13656 SDValue Add1 =
13657 DAG.getNode(ISD::ADD, dl, VT, Add0, N0.getOperand(N0RedOp));
13658 return DAG.getNode(ISD::ADD, dl, VT, Add1, N1.getOperand(N1RedOp));
13659 }
13660 return SDValue();
13661 };
13662 if (SDValue R = DistrubuteAddAddVecReduce(N0, N1))
13663 return R;
13664 if (SDValue R = DistrubuteAddAddVecReduce(N1, N0))
13665 return R;
13666
13667 // Distribute add(vecreduce(load(Y)), vecreduce(load(Z)))
13668 // Or add(add(X, vecreduce(load(Y))), vecreduce(load(Z)))
13669 // by ascending load offsets. This can help cores prefetch if the order of
13670 // loads is more predictable.
13671 auto DistrubuteVecReduceLoad = [&](SDValue N0, SDValue N1, bool IsForward) {
13672 // Check if two reductions are known to load data where one is before/after
13673 // another. Return negative if N0 loads data before N1, positive if N1 is
13674 // before N0 and 0 otherwise if nothing is known.
13675 auto IsKnownOrderedLoad = [&](SDValue N0, SDValue N1) {
13676 // Look through to the first operand of a MUL, for the VMLA case.
13677 // Currently only looks at the first operand, in the hope they are equal.
13678 if (N0.getOpcode() == ISD::MUL)
13679 N0 = N0.getOperand(0);
13680 if (N1.getOpcode() == ISD::MUL)
13681 N1 = N1.getOperand(0);
13682
13683 // Return true if the two operands are loads to the same object and the
13684 // offset of the first is known to be less than the offset of the second.
13685 LoadSDNode *Load0 = dyn_cast<LoadSDNode>(N0);
13686 LoadSDNode *Load1 = dyn_cast<LoadSDNode>(N1);
13687 if (!Load0 || !Load1 || Load0->getChain() != Load1->getChain() ||
13688 !Load0->isSimple() || !Load1->isSimple() || Load0->isIndexed() ||
13689 Load1->isIndexed())
13690 return 0;
13691
13692 auto BaseLocDecomp0 = BaseIndexOffset::match(Load0, DAG);
13693 auto BaseLocDecomp1 = BaseIndexOffset::match(Load1, DAG);
13694
13695 if (!BaseLocDecomp0.getBase() ||
13696 BaseLocDecomp0.getBase() != BaseLocDecomp1.getBase() ||
13697 !BaseLocDecomp0.hasValidOffset() || !BaseLocDecomp1.hasValidOffset())
13698 return 0;
13699 if (BaseLocDecomp0.getOffset() < BaseLocDecomp1.getOffset())
13700 return -1;
13701 if (BaseLocDecomp0.getOffset() > BaseLocDecomp1.getOffset())
13702 return 1;
13703 return 0;
13704 };
13705
13706 SDValue X;
13707 if (N0.getOpcode() == ISD::ADD && N0->hasOneUse()) {
13708 if (IsVecReduce(N0.getOperand(0)) && IsVecReduce(N0.getOperand(1))) {
13709 int IsBefore = IsKnownOrderedLoad(N0.getOperand(0).getOperand(0),
13710 N0.getOperand(1).getOperand(0));
13711 if (IsBefore < 0) {
13712 X = N0.getOperand(0);
13713 N0 = N0.getOperand(1);
13714 } else if (IsBefore > 0) {
13715 X = N0.getOperand(1);
13716 N0 = N0.getOperand(0);
13717 } else
13718 return SDValue();
13719 } else if (IsVecReduce(N0.getOperand(0))) {
13720 X = N0.getOperand(1);
13721 N0 = N0.getOperand(0);
13722 } else if (IsVecReduce(N0.getOperand(1))) {
13723 X = N0.getOperand(0);
13724 N0 = N0.getOperand(1);
13725 } else
13726 return SDValue();
13727 } else if (IsForward && IsVecReduce(N0) && IsVecReduce(N1) &&
13728 IsKnownOrderedLoad(N0.getOperand(0), N1.getOperand(0)) < 0) {
13729 // Note this is backward to how you would expect. We create
13730 // add(reduce(load + 16), reduce(load + 0)) so that the
13731 // add(reduce(load+16), X) is combined into VADDVA(X, load+16)), leaving
13732 // the X as VADDV(load + 0)
13733 return DAG.getNode(ISD::ADD, dl, VT, N1, N0);
13734 } else
13735 return SDValue();
13736
13737 if (!IsVecReduce(N0) || !IsVecReduce(N1))
13738 return SDValue();
13739
13740 if (IsKnownOrderedLoad(N1.getOperand(0), N0.getOperand(0)) >= 0)
13741 return SDValue();
13742
13743 // Switch from add(add(X, N0), N1) to add(add(X, N1), N0)
13744 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, X, N1);
13745 return DAG.getNode(ISD::ADD, dl, VT, Add0, N0);
13746 };
13747 if (SDValue R = DistrubuteVecReduceLoad(N0, N1, true))
13748 return R;
13749 if (SDValue R = DistrubuteVecReduceLoad(N1, N0, false))
13750 return R;
13751 return SDValue();
13752}
13753
13755 const ARMSubtarget *Subtarget) {
13756 if (!Subtarget->hasMVEIntegerOps())
13757 return SDValue();
13758
13760 return R;
13761
13762 EVT VT = N->getValueType(0);
13763 SDValue N0 = N->getOperand(0);
13764 SDValue N1 = N->getOperand(1);
13765 SDLoc dl(N);
13766
13767 if (VT != MVT::i64)
13768 return SDValue();
13769
13770 // We are looking for a i64 add of a VADDLVx. Due to these being i64's, this
13771 // will look like:
13772 // t1: i32,i32 = ARMISD::VADDLVs x
13773 // t2: i64 = build_pair t1, t1:1
13774 // t3: i64 = add t2, y
13775 // Otherwise we try to push the add up above VADDLVAx, to potentially allow
13776 // the add to be simplified separately.
13777 // We also need to check for sext / zext and commutitive adds.
13778 auto MakeVecReduce = [&](unsigned Opcode, unsigned OpcodeA, SDValue NA,
13779 SDValue NB) {
13780 if (NB->getOpcode() != ISD::BUILD_PAIR)
13781 return SDValue();
13782 SDValue VecRed = NB->getOperand(0);
13783 if ((VecRed->getOpcode() != Opcode && VecRed->getOpcode() != OpcodeA) ||
13784 VecRed.getResNo() != 0 ||
13785 NB->getOperand(1) != SDValue(VecRed.getNode(), 1))
13786 return SDValue();
13787
13788 if (VecRed->getOpcode() == OpcodeA) {
13789 // add(NA, VADDLVA(Inp), Y) -> VADDLVA(add(NA, Inp), Y)
13790 SDValue Inp = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
13791 VecRed.getOperand(0), VecRed.getOperand(1));
13792 NA = DAG.getNode(ISD::ADD, dl, MVT::i64, Inp, NA);
13793 }
13794
13796 std::tie(Ops[0], Ops[1]) = DAG.SplitScalar(NA, dl, MVT::i32, MVT::i32);
13797
13798 unsigned S = VecRed->getOpcode() == OpcodeA ? 2 : 0;
13799 for (unsigned I = S, E = VecRed.getNumOperands(); I < E; I++)
13800 Ops.push_back(VecRed->getOperand(I));
13801 SDValue Red =
13802 DAG.getNode(OpcodeA, dl, DAG.getVTList({MVT::i32, MVT::i32}), Ops);
13803 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Red,
13804 SDValue(Red.getNode(), 1));
13805 };
13806
13807 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N0, N1))
13808 return M;
13809 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N0, N1))
13810 return M;
13811 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N1, N0))
13812 return M;
13813 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N1, N0))
13814 return M;
13815 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N0, N1))
13816 return M;
13817 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N0, N1))
13818 return M;
13819 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N1, N0))
13820 return M;
13821 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N1, N0))
13822 return M;
13823 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N0, N1))
13824 return M;
13825 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N0, N1))
13826 return M;
13827 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N1, N0))
13828 return M;
13829 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
13830 return M;
13831 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
13832 return M;
13833 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
13834 return M;
13835 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
13836 return M;
13837 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
13838 return M;
13839 return SDValue();
13840}
13841
13842bool
13844 CombineLevel Level) const {
13845 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
13846 N->getOpcode() == ISD::SRL) &&
13847 "Expected shift op");
13848
13849 SDValue ShiftLHS = N->getOperand(0);
13850 if (!ShiftLHS->hasOneUse())
13851 return false;
13852
13853 if (ShiftLHS.getOpcode() == ISD::SIGN_EXTEND &&
13854 !ShiftLHS.getOperand(0)->hasOneUse())
13855 return false;
13856
13857 if (Level == BeforeLegalizeTypes)
13858 return true;
13859
13860 if (N->getOpcode() != ISD::SHL)
13861 return true;
13862
13863 if (Subtarget->isThumb1Only()) {
13864 // Avoid making expensive immediates by commuting shifts. (This logic
13865 // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted
13866 // for free.)
13867 if (N->getOpcode() != ISD::SHL)
13868 return true;
13869 SDValue N1 = N->getOperand(0);
13870 if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
13871 N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
13872 return true;
13873 if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
13874 if (Const->getAPIntValue().ult(256))
13875 return false;
13876 if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
13877 Const->getAPIntValue().sgt(-256))
13878 return false;
13879 }
13880 return true;
13881 }
13882
13883 // Turn off commute-with-shift transform after legalization, so it doesn't
13884 // conflict with PerformSHLSimplify. (We could try to detect when
13885 // PerformSHLSimplify would trigger more precisely, but it isn't
13886 // really necessary.)
13887 return false;
13888}
13889
13891 const SDNode *N) const {
13892 assert(N->getOpcode() == ISD::XOR &&
13893 (N->getOperand(0).getOpcode() == ISD::SHL ||
13894 N->getOperand(0).getOpcode() == ISD::SRL) &&
13895 "Expected XOR(SHIFT) pattern");
13896
13897 // Only commute if the entire NOT mask is a hidden shifted mask.
13898 auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
13899 auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
13900 if (XorC && ShiftC) {
13901 unsigned MaskIdx, MaskLen;
13902 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
13903 unsigned ShiftAmt = ShiftC->getZExtValue();
13904 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
13905 if (N->getOperand(0).getOpcode() == ISD::SHL)
13906 return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
13907 return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
13908 }
13909 }
13910
13911 return false;
13912}
13913
13915 const SDNode *N, CombineLevel Level) const {
13916 assert(((N->getOpcode() == ISD::SHL &&
13917 N->getOperand(0).getOpcode() == ISD::SRL) ||
13918 (N->getOpcode() == ISD::SRL &&
13919 N->getOperand(0).getOpcode() == ISD::SHL)) &&
13920 "Expected shift-shift mask");
13921
13922 if (!Subtarget->isThumb1Only())
13923 return true;
13924
13925 if (Level == BeforeLegalizeTypes)
13926 return true;
13927
13928 return false;
13929}
13930
13932 EVT VT) const {
13933 return Subtarget->hasMVEIntegerOps() && isTypeLegal(VT);
13934}
13935
13937 if (!Subtarget->hasNEON()) {
13938 if (Subtarget->isThumb1Only())
13939 return VT.getScalarSizeInBits() <= 32;
13940 return true;
13941 }
13942 return VT.isScalarInteger();
13943}
13944
13946 EVT VT) const {
13947 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
13948 return false;
13949
13950 switch (FPVT.getSimpleVT().SimpleTy) {
13951 case MVT::f16:
13952 return Subtarget->hasVFP2Base();
13953 case MVT::f32:
13954 return Subtarget->hasVFP2Base();
13955 case MVT::f64:
13956 return Subtarget->hasFP64();
13957 case MVT::v4f32:
13958 case MVT::v8f16:
13959 return Subtarget->hasMVEFloatOps();
13960 default:
13961 return false;
13962 }
13963}
13964
13967 const ARMSubtarget *ST) {
13968 // Allow the generic combiner to identify potential bswaps.
13969 if (DCI.isBeforeLegalize())
13970 return SDValue();
13971
13972 // DAG combiner will fold:
13973 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
13974 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
13975 // Other code patterns that can be also be modified have the following form:
13976 // b + ((a << 1) | 510)
13977 // b + ((a << 1) & 510)
13978 // b + ((a << 1) ^ 510)
13979 // b + ((a << 1) + 510)
13980
13981 // Many instructions can perform the shift for free, but it requires both
13982 // the operands to be registers. If c1 << c2 is too large, a mov immediate
13983 // instruction will needed. So, unfold back to the original pattern if:
13984 // - if c1 and c2 are small enough that they don't require mov imms.
13985 // - the user(s) of the node can perform an shl
13986
13987 // No shifted operands for 16-bit instructions.
13988 if (ST->isThumb() && ST->isThumb1Only())
13989 return SDValue();
13990
13991 // Check that all the users could perform the shl themselves.
13992 for (auto *U : N->users()) {
13993 switch(U->getOpcode()) {
13994 default:
13995 return SDValue();
13996 case ISD::SUB:
13997 case ISD::ADD:
13998 case ISD::AND:
13999 case ISD::OR:
14000 case ISD::XOR:
14001 case ISD::SETCC:
14002 case ARMISD::CMP:
14003 // Check that the user isn't already using a constant because there
14004 // aren't any instructions that support an immediate operand and a
14005 // shifted operand.
14006 if (isa<ConstantSDNode>(U->getOperand(0)) ||
14007 isa<ConstantSDNode>(U->getOperand(1)))
14008 return SDValue();
14009
14010 // Check that it's not already using a shift.
14011 if (U->getOperand(0).getOpcode() == ISD::SHL ||
14012 U->getOperand(1).getOpcode() == ISD::SHL)
14013 return SDValue();
14014 break;
14015 }
14016 }
14017
14018 if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
14019 N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
14020 return SDValue();
14021
14022 if (N->getOperand(0).getOpcode() != ISD::SHL)
14023 return SDValue();
14024
14025 SDValue SHL = N->getOperand(0);
14026
14027 auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
14028 auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
14029 if (!C1ShlC2 || !C2)
14030 return SDValue();
14031
14032 APInt C2Int = C2->getAPIntValue();
14033 APInt C1Int = C1ShlC2->getAPIntValue();
14034 unsigned C2Width = C2Int.getBitWidth();
14035 if (C2Int.uge(C2Width))
14036 return SDValue();
14037 uint64_t C2Value = C2Int.getZExtValue();
14038
14039 // Check that performing a lshr will not lose any information.
14040 APInt Mask = APInt::getHighBitsSet(C2Width, C2Width - C2Value);
14041 if ((C1Int & Mask) != C1Int)
14042 return SDValue();
14043
14044 // Shift the first constant.
14045 C1Int.lshrInPlace(C2Int);
14046
14047 // The immediates are encoded as an 8-bit value that can be rotated.
14048 auto LargeImm = [](const APInt &Imm) {
14049 unsigned Zeros = Imm.countl_zero() + Imm.countr_zero();
14050 return Imm.getBitWidth() - Zeros > 8;
14051 };
14052
14053 if (LargeImm(C1Int) || LargeImm(C2Int))
14054 return SDValue();
14055
14056 SelectionDAG &DAG = DCI.DAG;
14057 SDLoc dl(N);
14058 SDValue X = SHL.getOperand(0);
14059 SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
14060 DAG.getConstant(C1Int, dl, MVT::i32));
14061 // Shift left to compensate for the lshr of C1Int.
14062 SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));
14063
14064 LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();
14065 SHL.dump(); N->dump());
14066 LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump());
14067 return Res;
14068}
14069
14070
14071/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
14072///
14075 const ARMSubtarget *Subtarget) {
14076 SDValue N0 = N->getOperand(0);
14077 SDValue N1 = N->getOperand(1);
14078
14079 // Only works one way, because it needs an immediate operand.
14080 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14081 return Result;
14082
14083 if (SDValue Result = PerformADDVecReduce(N, DCI.DAG, Subtarget))
14084 return Result;
14085
14086 // First try with the default operand order.
14087 if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
14088 return Result;
14089
14090 // If that didn't work, try again with the operands commuted.
14091 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
14092}
14093
14094// Combine (sub 0, (csinc X, Y, CC)) -> (csinv -X, Y, CC)
14095// providing -X is as cheap as X (currently, just a constant).
14097 if (N->getValueType(0) != MVT::i32 || !isNullConstant(N->getOperand(0)))
14098 return SDValue();
14099 SDValue CSINC = N->getOperand(1);
14100 if (CSINC.getOpcode() != ARMISD::CSINC || !CSINC.hasOneUse())
14101 return SDValue();
14102
14103 ConstantSDNode *X = dyn_cast<ConstantSDNode>(CSINC.getOperand(0));
14104 if (!X)
14105 return SDValue();
14106
14107 return DAG.getNode(ARMISD::CSINV, SDLoc(N), MVT::i32,
14108 DAG.getNode(ISD::SUB, SDLoc(N), MVT::i32, N->getOperand(0),
14109 CSINC.getOperand(0)),
14110 CSINC.getOperand(1), CSINC.getOperand(2),
14111 CSINC.getOperand(3));
14112}
14113
14114/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
14115///
14118 const ARMSubtarget *Subtarget) {
14119 SDValue N0 = N->getOperand(0);
14120 SDValue N1 = N->getOperand(1);
14121
14122 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
14123 if (N1.getNode()->hasOneUse())
14124 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
14125 return Result;
14126
14127 if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))
14128 return R;
14129
14130 if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
14131 return SDValue();
14132
14133 // Fold (sub (ARMvmovImm 0), (ARMvdup x)) -> (ARMvdup (sub 0, x))
14134 // so that we can readily pattern match more mve instructions which can use
14135 // a scalar operand.
14136 SDValue VDup = N->getOperand(1);
14137 if (VDup->getOpcode() != ARMISD::VDUP)
14138 return SDValue();
14139
14140 SDValue VMov = N->getOperand(0);
14141 if (VMov->getOpcode() == ISD::BITCAST)
14142 VMov = VMov->getOperand(0);
14143
14144 if (VMov->getOpcode() != ARMISD::VMOVIMM || !isZeroVector(VMov))
14145 return SDValue();
14146
14147 SDLoc dl(N);
14148 SDValue Negate = DCI.DAG.getNode(ISD::SUB, dl, MVT::i32,
14149 DCI.DAG.getConstant(0, dl, MVT::i32),
14150 VDup->getOperand(0));
14151 return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0), Negate);
14152}
14153
14154/// PerformVMULCombine
14155/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
14156/// special multiplier accumulator forwarding.
14157/// vmul d3, d0, d2
14158/// vmla d3, d1, d2
14159/// is faster than
14160/// vadd d3, d0, d1
14161/// vmul d3, d3, d2
14162// However, for (A + B) * (A + B),
14163// vadd d2, d0, d1
14164// vmul d3, d0, d2
14165// vmla d3, d1, d2
14166// is slower than
14167// vadd d2, d0, d1
14168// vmul d3, d2, d2
14171 const ARMSubtarget *Subtarget) {
14172 if (!Subtarget->hasVMLxForwarding())
14173 return SDValue();
14174
14175 SelectionDAG &DAG = DCI.DAG;
14176 SDValue N0 = N->getOperand(0);
14177 SDValue N1 = N->getOperand(1);
14178 unsigned Opcode = N0.getOpcode();
14179 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14180 Opcode != ISD::FADD && Opcode != ISD::FSUB) {
14181 Opcode = N1.getOpcode();
14182 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14183 Opcode != ISD::FADD && Opcode != ISD::FSUB)
14184 return SDValue();
14185 std::swap(N0, N1);
14186 }
14187
14188 if (N0 == N1)
14189 return SDValue();
14190
14191 EVT VT = N->getValueType(0);
14192 SDLoc DL(N);
14193 SDValue N00 = N0->getOperand(0);
14194 SDValue N01 = N0->getOperand(1);
14195 return DAG.getNode(Opcode, DL, VT,
14196 DAG.getNode(ISD::MUL, DL, VT, N00, N1),
14197 DAG.getNode(ISD::MUL, DL, VT, N01, N1));
14198}
14199
14201 const ARMSubtarget *Subtarget) {
14202 EVT VT = N->getValueType(0);
14203 if (VT != MVT::v2i64)
14204 return SDValue();
14205
14206 SDValue N0 = N->getOperand(0);
14207 SDValue N1 = N->getOperand(1);
14208
14209 auto IsSignExt = [&](SDValue Op) {
14210 if (Op->getOpcode() != ISD::SIGN_EXTEND_INREG)
14211 return SDValue();
14212 EVT VT = cast<VTSDNode>(Op->getOperand(1))->getVT();
14213 if (VT.getScalarSizeInBits() == 32)
14214 return Op->getOperand(0);
14215 return SDValue();
14216 };
14217 auto IsZeroExt = [&](SDValue Op) {
14218 // Zero extends are a little more awkward. At the point we are matching
14219 // this, we are looking for an AND with a (-1, 0, -1, 0) buildvector mask.
14220 // That might be before of after a bitcast depending on how the and is
14221 // placed. Because this has to look through bitcasts, it is currently only
14222 // supported on LE.
14223 if (!Subtarget->isLittle())
14224 return SDValue();
14225
14226 SDValue And = Op;
14227 if (And->getOpcode() == ISD::BITCAST)
14228 And = And->getOperand(0);
14229 if (And->getOpcode() != ISD::AND)
14230 return SDValue();
14231 SDValue Mask = And->getOperand(1);
14232 if (Mask->getOpcode() == ISD::BITCAST)
14233 Mask = Mask->getOperand(0);
14234
14235 if (Mask->getOpcode() != ISD::BUILD_VECTOR ||
14236 Mask.getValueType() != MVT::v4i32)
14237 return SDValue();
14238 if (isAllOnesConstant(Mask->getOperand(0)) &&
14239 isNullConstant(Mask->getOperand(1)) &&
14240 isAllOnesConstant(Mask->getOperand(2)) &&
14241 isNullConstant(Mask->getOperand(3)))
14242 return And->getOperand(0);
14243 return SDValue();
14244 };
14245
14246 SDLoc dl(N);
14247 if (SDValue Op0 = IsSignExt(N0)) {
14248 if (SDValue Op1 = IsSignExt(N1)) {
14249 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14250 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14251 return DAG.getNode(ARMISD::VMULLs, dl, VT, New0a, New1a);
14252 }
14253 }
14254 if (SDValue Op0 = IsZeroExt(N0)) {
14255 if (SDValue Op1 = IsZeroExt(N1)) {
14256 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14257 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14258 return DAG.getNode(ARMISD::VMULLu, dl, VT, New0a, New1a);
14259 }
14260 }
14261
14262 return SDValue();
14263}
14264
14267 const ARMSubtarget *Subtarget) {
14268 SelectionDAG &DAG = DCI.DAG;
14269
14270 EVT VT = N->getValueType(0);
14271 if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)
14272 return PerformMVEVMULLCombine(N, DAG, Subtarget);
14273
14274 if (Subtarget->isThumb1Only())
14275 return SDValue();
14276
14277 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14278 return SDValue();
14279
14280 if (VT.is64BitVector() || VT.is128BitVector())
14281 return PerformVMULCombine(N, DCI, Subtarget);
14282 if (VT != MVT::i32)
14283 return SDValue();
14284
14285 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14286 if (!C)
14287 return SDValue();
14288
14289 int64_t MulAmt = C->getSExtValue();
14290 unsigned ShiftAmt = llvm::countr_zero<uint64_t>(MulAmt);
14291
14292 ShiftAmt = ShiftAmt & (32 - 1);
14293 SDValue V = N->getOperand(0);
14294 SDLoc DL(N);
14295
14296 SDValue Res;
14297 MulAmt >>= ShiftAmt;
14298
14299 if (MulAmt >= 0) {
14300 if (llvm::has_single_bit<uint32_t>(MulAmt - 1)) {
14301 // (mul x, 2^N + 1) => (add (shl x, N), x)
14302 Res = DAG.getNode(ISD::ADD, DL, VT,
14303 V,
14304 DAG.getNode(ISD::SHL, DL, VT,
14305 V,
14306 DAG.getConstant(Log2_32(MulAmt - 1), DL,
14307 MVT::i32)));
14308 } else if (llvm::has_single_bit<uint32_t>(MulAmt + 1)) {
14309 // (mul x, 2^N - 1) => (sub (shl x, N), x)
14310 Res = DAG.getNode(ISD::SUB, DL, VT,
14311 DAG.getNode(ISD::SHL, DL, VT,
14312 V,
14313 DAG.getConstant(Log2_32(MulAmt + 1), DL,
14314 MVT::i32)),
14315 V);
14316 } else
14317 return SDValue();
14318 } else {
14319 uint64_t MulAmtAbs = -MulAmt;
14320 if (llvm::has_single_bit<uint32_t>(MulAmtAbs + 1)) {
14321 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
14322 Res = DAG.getNode(ISD::SUB, DL, VT,
14323 V,
14324 DAG.getNode(ISD::SHL, DL, VT,
14325 V,
14326 DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
14327 MVT::i32)));
14328 } else if (llvm::has_single_bit<uint32_t>(MulAmtAbs - 1)) {
14329 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
14330 Res = DAG.getNode(ISD::ADD, DL, VT,
14331 V,
14332 DAG.getNode(ISD::SHL, DL, VT,
14333 V,
14334 DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
14335 MVT::i32)));
14336 Res = DAG.getNode(ISD::SUB, DL, VT,
14337 DAG.getConstant(0, DL, MVT::i32), Res);
14338 } else
14339 return SDValue();
14340 }
14341
14342 if (ShiftAmt != 0)
14343 Res = DAG.getNode(ISD::SHL, DL, VT,
14344 Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
14345
14346 // Do not add new nodes to DAG combiner worklist.
14347 DCI.CombineTo(N, Res, false);
14348 return SDValue();
14349}
14350
14353 const ARMSubtarget *Subtarget) {
14354 // Allow DAGCombine to pattern-match before we touch the canonical form.
14355 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14356 return SDValue();
14357
14358 if (N->getValueType(0) != MVT::i32)
14359 return SDValue();
14360
14361 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14362 if (!N1C)
14363 return SDValue();
14364
14365 uint32_t C1 = (uint32_t)N1C->getZExtValue();
14366 // Don't transform uxtb/uxth.
14367 if (C1 == 255 || C1 == 65535)
14368 return SDValue();
14369
14370 SDNode *N0 = N->getOperand(0).getNode();
14371 if (!N0->hasOneUse())
14372 return SDValue();
14373
14374 if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
14375 return SDValue();
14376
14377 bool LeftShift = N0->getOpcode() == ISD::SHL;
14378
14379 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14380 if (!N01C)
14381 return SDValue();
14382
14383 uint32_t C2 = (uint32_t)N01C->getZExtValue();
14384 if (!C2 || C2 >= 32)
14385 return SDValue();
14386
14387 // Clear irrelevant bits in the mask.
14388 if (LeftShift)
14389 C1 &= (-1U << C2);
14390 else
14391 C1 &= (-1U >> C2);
14392
14393 SelectionDAG &DAG = DCI.DAG;
14394 SDLoc DL(N);
14395
14396 // We have a pattern of the form "(and (shl x, c2) c1)" or
14397 // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
14398 // transform to a pair of shifts, to save materializing c1.
14399
14400 // First pattern: right shift, then mask off leading bits.
14401 // FIXME: Use demanded bits?
14402 if (!LeftShift && isMask_32(C1)) {
14403 uint32_t C3 = llvm::countl_zero(C1);
14404 if (C2 < C3) {
14405 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14406 DAG.getConstant(C3 - C2, DL, MVT::i32));
14407 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14408 DAG.getConstant(C3, DL, MVT::i32));
14409 }
14410 }
14411
14412 // First pattern, reversed: left shift, then mask off trailing bits.
14413 if (LeftShift && isMask_32(~C1)) {
14414 uint32_t C3 = llvm::countr_zero(C1);
14415 if (C2 < C3) {
14416 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14417 DAG.getConstant(C3 - C2, DL, MVT::i32));
14418 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14419 DAG.getConstant(C3, DL, MVT::i32));
14420 }
14421 }
14422
14423 // Second pattern: left shift, then mask off leading bits.
14424 // FIXME: Use demanded bits?
14425 if (LeftShift && isShiftedMask_32(C1)) {
14426 uint32_t Trailing = llvm::countr_zero(C1);
14427 uint32_t C3 = llvm::countl_zero(C1);
14428 if (Trailing == C2 && C2 + C3 < 32) {
14429 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14430 DAG.getConstant(C2 + C3, DL, MVT::i32));
14431 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14432 DAG.getConstant(C3, DL, MVT::i32));
14433 }
14434 }
14435
14436 // Second pattern, reversed: right shift, then mask off trailing bits.
14437 // FIXME: Handle other patterns of known/demanded bits.
14438 if (!LeftShift && isShiftedMask_32(C1)) {
14439 uint32_t Leading = llvm::countl_zero(C1);
14440 uint32_t C3 = llvm::countr_zero(C1);
14441 if (Leading == C2 && C2 + C3 < 32) {
14442 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14443 DAG.getConstant(C2 + C3, DL, MVT::i32));
14444 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14445 DAG.getConstant(C3, DL, MVT::i32));
14446 }
14447 }
14448
14449 // Transform "(and (shl x, c2) c1)" into "(shl (and x, c1>>c2), c2)"
14450 // if "c1 >> c2" is a cheaper immediate than "c1"
14451 if (LeftShift &&
14452 HasLowerConstantMaterializationCost(C1 >> C2, C1, Subtarget)) {
14453
14454 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i32, N0->getOperand(0),
14455 DAG.getConstant(C1 >> C2, DL, MVT::i32));
14456 return DAG.getNode(ISD::SHL, DL, MVT::i32, And,
14457 DAG.getConstant(C2, DL, MVT::i32));
14458 }
14459
14460 return SDValue();
14461}
14462
14465 const ARMSubtarget *Subtarget) {
14466 // Attempt to use immediate-form VBIC
14467 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14468 SDLoc dl(N);
14469 EVT VT = N->getValueType(0);
14470 SelectionDAG &DAG = DCI.DAG;
14471
14472 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v2i1 ||
14473 VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)
14474 return SDValue();
14475
14476 APInt SplatBits, SplatUndef;
14477 unsigned SplatBitSize;
14478 bool HasAnyUndefs;
14479 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14480 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14481 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14482 SplatBitSize == 64) {
14483 EVT VbicVT;
14484 SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
14485 SplatUndef.getZExtValue(), SplatBitSize,
14486 DAG, dl, VbicVT, VT, OtherModImm);
14487 if (Val.getNode()) {
14488 SDValue Input =
14489 DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VbicVT, N->getOperand(0));
14490 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
14491 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vbic);
14492 }
14493 }
14494 }
14495
14496 if (!Subtarget->isThumb1Only()) {
14497 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
14498 if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
14499 return Result;
14500
14501 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14502 return Result;
14503 }
14504
14505 if (Subtarget->isThumb1Only())
14506 if (SDValue Result = CombineANDShift(N, DCI, Subtarget))
14507 return Result;
14508
14509 return SDValue();
14510}
14511
14512// Try combining OR nodes to SMULWB, SMULWT.
14515 const ARMSubtarget *Subtarget) {
14516 if (!Subtarget->hasV6Ops() ||
14517 (Subtarget->isThumb() &&
14518 (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
14519 return SDValue();
14520
14521 SDValue SRL = OR->getOperand(0);
14522 SDValue SHL = OR->getOperand(1);
14523
14524 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
14525 SRL = OR->getOperand(1);
14526 SHL = OR->getOperand(0);
14527 }
14528 if (!isSRL16(SRL) || !isSHL16(SHL))
14529 return SDValue();
14530
14531 // The first operands to the shifts need to be the two results from the
14532 // same smul_lohi node.
14533 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
14534 SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
14535 return SDValue();
14536
14537 SDNode *SMULLOHI = SRL.getOperand(0).getNode();
14538 if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
14539 SHL.getOperand(0) != SDValue(SMULLOHI, 1))
14540 return SDValue();
14541
14542 // Now we have:
14543 // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
14544 // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
14545 // For SMUWB the 16-bit value will signed extended somehow.
14546 // For SMULWT only the SRA is required.
14547 // Check both sides of SMUL_LOHI
14548 SDValue OpS16 = SMULLOHI->getOperand(0);
14549 SDValue OpS32 = SMULLOHI->getOperand(1);
14550
14551 SelectionDAG &DAG = DCI.DAG;
14552 if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
14553 OpS16 = OpS32;
14554 OpS32 = SMULLOHI->getOperand(0);
14555 }
14556
14557 SDLoc dl(OR);
14558 unsigned Opcode = 0;
14559 if (isS16(OpS16, DAG))
14560 Opcode = ARMISD::SMULWB;
14561 else if (isSRA16(OpS16)) {
14562 Opcode = ARMISD::SMULWT;
14563 OpS16 = OpS16->getOperand(0);
14564 }
14565 else
14566 return SDValue();
14567
14568 SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
14569 DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
14570 return SDValue(OR, 0);
14571}
14572
14575 const ARMSubtarget *Subtarget) {
14576 // BFI is only available on V6T2+
14577 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
14578 return SDValue();
14579
14580 EVT VT = N->getValueType(0);
14581 SDValue N0 = N->getOperand(0);
14582 SDValue N1 = N->getOperand(1);
14583 SelectionDAG &DAG = DCI.DAG;
14584 SDLoc DL(N);
14585 // 1) or (and A, mask), val => ARMbfi A, val, mask
14586 // iff (val & mask) == val
14587 //
14588 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14589 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
14590 // && mask == ~mask2
14591 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
14592 // && ~mask == mask2
14593 // (i.e., copy a bitfield value into another bitfield of the same width)
14594
14595 if (VT != MVT::i32)
14596 return SDValue();
14597
14598 SDValue N00 = N0.getOperand(0);
14599
14600 // The value and the mask need to be constants so we can verify this is
14601 // actually a bitfield set. If the mask is 0xffff, we can do better
14602 // via a movt instruction, so don't use BFI in that case.
14603 SDValue MaskOp = N0.getOperand(1);
14604 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
14605 if (!MaskC)
14606 return SDValue();
14607 unsigned Mask = MaskC->getZExtValue();
14608 if (Mask == 0xffff)
14609 return SDValue();
14610 SDValue Res;
14611 // Case (1): or (and A, mask), val => ARMbfi A, val, mask
14612 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
14613 if (N1C) {
14614 unsigned Val = N1C->getZExtValue();
14615 if ((Val & ~Mask) != Val)
14616 return SDValue();
14617
14618 if (ARM::isBitFieldInvertedMask(Mask)) {
14619 Val >>= llvm::countr_zero(~Mask);
14620
14621 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
14622 DAG.getConstant(Val, DL, MVT::i32),
14623 DAG.getConstant(Mask, DL, MVT::i32));
14624
14625 DCI.CombineTo(N, Res, false);
14626 // Return value from the original node to inform the combiner than N is
14627 // now dead.
14628 return SDValue(N, 0);
14629 }
14630 } else if (N1.getOpcode() == ISD::AND) {
14631 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14632 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14633 if (!N11C)
14634 return SDValue();
14635 unsigned Mask2 = N11C->getZExtValue();
14636
14637 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
14638 // as is to match.
14639 if (ARM::isBitFieldInvertedMask(Mask) &&
14640 (Mask == ~Mask2)) {
14641 // The pack halfword instruction works better for masks that fit it,
14642 // so use that when it's available.
14643 if (Subtarget->hasDSP() &&
14644 (Mask == 0xffff || Mask == 0xffff0000))
14645 return SDValue();
14646 // 2a
14647 unsigned amt = llvm::countr_zero(Mask2);
14648 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
14649 DAG.getConstant(amt, DL, MVT::i32));
14650 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
14651 DAG.getConstant(Mask, DL, MVT::i32));
14652 DCI.CombineTo(N, Res, false);
14653 // Return value from the original node to inform the combiner than N is
14654 // now dead.
14655 return SDValue(N, 0);
14656 } else if (ARM::isBitFieldInvertedMask(~Mask) &&
14657 (~Mask == Mask2)) {
14658 // The pack halfword instruction works better for masks that fit it,
14659 // so use that when it's available.
14660 if (Subtarget->hasDSP() &&
14661 (Mask2 == 0xffff || Mask2 == 0xffff0000))
14662 return SDValue();
14663 // 2b
14664 unsigned lsb = llvm::countr_zero(Mask);
14665 Res = DAG.getNode(ISD::SRL, DL, VT, N00,
14666 DAG.getConstant(lsb, DL, MVT::i32));
14667 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
14668 DAG.getConstant(Mask2, DL, MVT::i32));
14669 DCI.CombineTo(N, Res, false);
14670 // Return value from the original node to inform the combiner than N is
14671 // now dead.
14672 return SDValue(N, 0);
14673 }
14674 }
14675
14676 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
14677 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
14679 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
14680 // where lsb(mask) == #shamt and masked bits of B are known zero.
14681 SDValue ShAmt = N00.getOperand(1);
14682 unsigned ShAmtC = ShAmt->getAsZExtVal();
14683 unsigned LSB = llvm::countr_zero(Mask);
14684 if (ShAmtC != LSB)
14685 return SDValue();
14686
14687 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
14688 DAG.getConstant(~Mask, DL, MVT::i32));
14689
14690 DCI.CombineTo(N, Res, false);
14691 // Return value from the original node to inform the combiner than N is
14692 // now dead.
14693 return SDValue(N, 0);
14694 }
14695
14696 return SDValue();
14697}
14698
14699static bool isValidMVECond(unsigned CC, bool IsFloat) {
14700 switch (CC) {
14701 case ARMCC::EQ:
14702 case ARMCC::NE:
14703 case ARMCC::LE:
14704 case ARMCC::GT:
14705 case ARMCC::GE:
14706 case ARMCC::LT:
14707 return true;
14708 case ARMCC::HS:
14709 case ARMCC::HI:
14710 return !IsFloat;
14711 default:
14712 return false;
14713 };
14714}
14715
14717 if (N->getOpcode() == ARMISD::VCMP)
14718 return (ARMCC::CondCodes)N->getConstantOperandVal(2);
14719 else if (N->getOpcode() == ARMISD::VCMPZ)
14720 return (ARMCC::CondCodes)N->getConstantOperandVal(1);
14721 else
14722 llvm_unreachable("Not a VCMP/VCMPZ!");
14723}
14724
14727 return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint());
14728}
14729
14731 const ARMSubtarget *Subtarget) {
14732 // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
14733 // together with predicates
14734 EVT VT = N->getValueType(0);
14735 SDLoc DL(N);
14736 SDValue N0 = N->getOperand(0);
14737 SDValue N1 = N->getOperand(1);
14738
14739 auto IsFreelyInvertable = [&](SDValue V) {
14740 if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)
14741 return CanInvertMVEVCMP(V);
14742 return false;
14743 };
14744
14745 // At least one operand must be freely invertable.
14746 if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1)))
14747 return SDValue();
14748
14749 SDValue NewN0 = DAG.getLogicalNOT(DL, N0, VT);
14750 SDValue NewN1 = DAG.getLogicalNOT(DL, N1, VT);
14751 SDValue And = DAG.getNode(ISD::AND, DL, VT, NewN0, NewN1);
14752 return DAG.getLogicalNOT(DL, And, VT);
14753}
14754
14755/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
14758 const ARMSubtarget *Subtarget) {
14759 // Attempt to use immediate-form VORR
14760 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14761 SDLoc dl(N);
14762 EVT VT = N->getValueType(0);
14763 SelectionDAG &DAG = DCI.DAG;
14764
14765 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14766 return SDValue();
14767
14768 if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||
14769 VT == MVT::v8i1 || VT == MVT::v16i1))
14770 return PerformORCombine_i1(N, DAG, Subtarget);
14771
14772 APInt SplatBits, SplatUndef;
14773 unsigned SplatBitSize;
14774 bool HasAnyUndefs;
14775 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14776 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14777 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14778 SplatBitSize == 64) {
14779 EVT VorrVT;
14780 SDValue Val =
14781 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
14782 SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
14783 if (Val.getNode()) {
14784 SDValue Input =
14785 DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VorrVT, N->getOperand(0));
14786 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
14787 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vorr);
14788 }
14789 }
14790 }
14791
14792 if (!Subtarget->isThumb1Only()) {
14793 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
14794 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14795 return Result;
14796 if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
14797 return Result;
14798 }
14799
14800 SDValue N0 = N->getOperand(0);
14801 SDValue N1 = N->getOperand(1);
14802
14803 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
14804 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
14806
14807 // The code below optimizes (or (and X, Y), Z).
14808 // The AND operand needs to have a single user to make these optimizations
14809 // profitable.
14810 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
14811 return SDValue();
14812
14813 APInt SplatUndef;
14814 unsigned SplatBitSize;
14815 bool HasAnyUndefs;
14816
14817 APInt SplatBits0, SplatBits1;
14818 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
14819 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
14820 // Ensure that the second operand of both ands are constants
14821 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
14822 HasAnyUndefs) && !HasAnyUndefs) {
14823 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
14824 HasAnyUndefs) && !HasAnyUndefs) {
14825 // Ensure that the bit width of the constants are the same and that
14826 // the splat arguments are logical inverses as per the pattern we
14827 // are trying to simplify.
14828 if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
14829 SplatBits0 == ~SplatBits1) {
14830 // Canonicalize the vector type to make instruction selection
14831 // simpler.
14832 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
14833 SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
14834 N0->getOperand(1),
14835 N0->getOperand(0),
14836 N1->getOperand(0));
14837 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Result);
14838 }
14839 }
14840 }
14841 }
14842
14843 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
14844 // reasonable.
14845 if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
14846 if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
14847 return Res;
14848 }
14849
14850 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14851 return Result;
14852
14853 return SDValue();
14854}
14855
14858 const ARMSubtarget *Subtarget) {
14859 EVT VT = N->getValueType(0);
14860 SelectionDAG &DAG = DCI.DAG;
14861
14862 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14863 return SDValue();
14864
14865 if (!Subtarget->isThumb1Only()) {
14866 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
14867 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14868 return Result;
14869
14870 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14871 return Result;
14872 }
14873
14874 if (Subtarget->hasMVEIntegerOps()) {
14875 // fold (xor(vcmp/z, 1)) into a vcmp with the opposite condition.
14876 SDValue N0 = N->getOperand(0);
14877 SDValue N1 = N->getOperand(1);
14878 const TargetLowering *TLI = Subtarget->getTargetLowering();
14879 if (TLI->isConstTrueVal(N1) &&
14880 (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
14881 if (CanInvertMVEVCMP(N0)) {
14882 SDLoc DL(N0);
14884
14886 Ops.push_back(N0->getOperand(0));
14887 if (N0->getOpcode() == ARMISD::VCMP)
14888 Ops.push_back(N0->getOperand(1));
14889 Ops.push_back(DAG.getConstant(CC, DL, MVT::i32));
14890 return DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops);
14891 }
14892 }
14893 }
14894
14895 return SDValue();
14896}
14897
14898// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
14899// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
14900// their position in "to" (Rd).
14901static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
14902 assert(N->getOpcode() == ARMISD::BFI);
14903
14904 SDValue From = N->getOperand(1);
14905 ToMask = ~N->getConstantOperandAPInt(2);
14906 FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.popcount());
14907
14908 // If the Base came from a SHR #C, we can deduce that it is really testing bit
14909 // #C in the base of the SHR.
14910 if (From->getOpcode() == ISD::SRL &&
14911 isa<ConstantSDNode>(From->getOperand(1))) {
14912 APInt Shift = From->getConstantOperandAPInt(1);
14913 assert(Shift.getLimitedValue() < 32 && "Shift too large!");
14914 FromMask <<= Shift.getLimitedValue(31);
14915 From = From->getOperand(0);
14916 }
14917
14918 return From;
14919}
14920
14921// If A and B contain one contiguous set of bits, does A | B == A . B?
14922//
14923// Neither A nor B must be zero.
14924static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
14925 unsigned LastActiveBitInA = A.countr_zero();
14926 unsigned FirstActiveBitInB = B.getBitWidth() - B.countl_zero() - 1;
14927 return LastActiveBitInA - 1 == FirstActiveBitInB;
14928}
14929
14931 // We have a BFI in N. Find a BFI it can combine with, if one exists.
14932 APInt ToMask, FromMask;
14933 SDValue From = ParseBFI(N, ToMask, FromMask);
14934 SDValue To = N->getOperand(0);
14935
14936 SDValue V = To;
14937 if (V.getOpcode() != ARMISD::BFI)
14938 return SDValue();
14939
14940 APInt NewToMask, NewFromMask;
14941 SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
14942 if (NewFrom != From)
14943 return SDValue();
14944
14945 // Do the written bits conflict with any we've seen so far?
14946 if ((NewToMask & ToMask).getBoolValue())
14947 // Conflicting bits.
14948 return SDValue();
14949
14950 // Are the new bits contiguous when combined with the old bits?
14951 if (BitsProperlyConcatenate(ToMask, NewToMask) &&
14952 BitsProperlyConcatenate(FromMask, NewFromMask))
14953 return V;
14954 if (BitsProperlyConcatenate(NewToMask, ToMask) &&
14955 BitsProperlyConcatenate(NewFromMask, FromMask))
14956 return V;
14957
14958 return SDValue();
14959}
14960
14962 SDValue N0 = N->getOperand(0);
14963 SDValue N1 = N->getOperand(1);
14964
14965 if (N1.getOpcode() == ISD::AND) {
14966 // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
14967 // the bits being cleared by the AND are not demanded by the BFI.
14968 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14969 if (!N11C)
14970 return SDValue();
14971 unsigned InvMask = N->getConstantOperandVal(2);
14972 unsigned LSB = llvm::countr_zero(~InvMask);
14973 unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB;
14974 assert(Width <
14975 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
14976 "undefined behavior");
14977 unsigned Mask = (1u << Width) - 1;
14978 unsigned Mask2 = N11C->getZExtValue();
14979 if ((Mask & (~Mask2)) == 0)
14980 return DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
14981 N->getOperand(0), N1.getOperand(0), N->getOperand(2));
14982 return SDValue();
14983 }
14984
14985 // Look for another BFI to combine with.
14986 if (SDValue CombineBFI = FindBFIToCombineWith(N)) {
14987 // We've found a BFI.
14988 APInt ToMask1, FromMask1;
14989 SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
14990
14991 APInt ToMask2, FromMask2;
14992 SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
14993 assert(From1 == From2);
14994 (void)From2;
14995
14996 // Create a new BFI, combining the two together.
14997 APInt NewFromMask = FromMask1 | FromMask2;
14998 APInt NewToMask = ToMask1 | ToMask2;
14999
15000 EVT VT = N->getValueType(0);
15001 SDLoc dl(N);
15002
15003 if (NewFromMask[0] == 0)
15004 From1 = DAG.getNode(ISD::SRL, dl, VT, From1,
15005 DAG.getConstant(NewFromMask.countr_zero(), dl, VT));
15006 return DAG.getNode(ARMISD::BFI, dl, VT, CombineBFI.getOperand(0), From1,
15007 DAG.getConstant(~NewToMask, dl, VT));
15008 }
15009
15010 // Reassociate BFI(BFI (A, B, M1), C, M2) to BFI(BFI (A, C, M2), B, M1) so
15011 // that lower bit insertions are performed first, providing that M1 and M2
15012 // do no overlap. This can allow multiple BFI instructions to be combined
15013 // together by the other folds above.
15014 if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
15015 APInt ToMask1 = ~N->getConstantOperandAPInt(2);
15016 APInt ToMask2 = ~N0.getConstantOperandAPInt(2);
15017
15018 if (!N0.hasOneUse() || (ToMask1 & ToMask2) != 0 ||
15019 ToMask1.countl_zero() < ToMask2.countl_zero())
15020 return SDValue();
15021
15022 EVT VT = N->getValueType(0);
15023 SDLoc dl(N);
15024 SDValue BFI1 = DAG.getNode(ARMISD::BFI, dl, VT, N0.getOperand(0),
15025 N->getOperand(1), N->getOperand(2));
15026 return DAG.getNode(ARMISD::BFI, dl, VT, BFI1, N0.getOperand(1),
15027 N0.getOperand(2));
15028 }
15029
15030 return SDValue();
15031}
15032
15033// Check that N is CMPZ(CSINC(0, 0, CC, X)),
15034// or CMPZ(CMOV(1, 0, CC, X))
15035// return X if valid.
15037 if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
15038 return SDValue();
15039 SDValue CSInc = Cmp->getOperand(0);
15040
15041 // Ignore any `And 1` nodes that may not yet have been removed. We are
15042 // looking for a value that produces 1/0, so these have no effect on the
15043 // code.
15044 while (CSInc.getOpcode() == ISD::AND &&
15045 isa<ConstantSDNode>(CSInc.getOperand(1)) &&
15046 CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())
15047 CSInc = CSInc.getOperand(0);
15048
15049 if (CSInc.getOpcode() == ARMISD::CSINC &&
15050 isNullConstant(CSInc.getOperand(0)) &&
15051 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
15053 return CSInc.getOperand(3);
15054 }
15055 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) &&
15056 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
15058 return CSInc.getOperand(3);
15059 }
15060 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) &&
15061 isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
15064 return CSInc.getOperand(3);
15065 }
15066 return SDValue();
15067}
15068
15070 // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in
15071 // t92: flags = ARMISD::CMPZ t74, 0
15072 // t93: i32 = ARMISD::CSINC 0, 0, 1, t92
15073 // t96: flags = ARMISD::CMPZ t93, 0
15074 // t114: i32 = ARMISD::CSINV 0, 0, 0, t96
15076 if (SDValue C = IsCMPZCSINC(N, Cond))
15077 if (Cond == ARMCC::EQ)
15078 return C;
15079 return SDValue();
15080}
15081
15083 // Fold away an unneccessary CMPZ/CSINC
15084 // CSXYZ A, B, C1 (CMPZ (CSINC 0, 0, C2, D), 0) ->
15085 // if C1==EQ -> CSXYZ A, B, C2, D
15086 // if C1==NE -> CSXYZ A, B, NOT(C2), D
15088 if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
15089 if (N->getConstantOperandVal(2) == ARMCC::EQ)
15090 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15091 N->getOperand(1),
15092 DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
15093 if (N->getConstantOperandVal(2) == ARMCC::NE)
15094 return DAG.getNode(
15095 N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15096 N->getOperand(1),
15098 }
15099 return SDValue();
15100}
15101
15102/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
15103/// ARMISD::VMOVRRD.
15106 const ARMSubtarget *Subtarget) {
15107 // vmovrrd(vmovdrr x, y) -> x,y
15108 SDValue InDouble = N->getOperand(0);
15109 if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
15110 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
15111
15112 // vmovrrd(load f64) -> (load i32), (load i32)
15113 SDNode *InNode = InDouble.getNode();
15114 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
15115 InNode->getValueType(0) == MVT::f64 &&
15116 InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
15117 !cast<LoadSDNode>(InNode)->isVolatile()) {
15118 // TODO: Should this be done for non-FrameIndex operands?
15119 LoadSDNode *LD = cast<LoadSDNode>(InNode);
15120
15121 SelectionDAG &DAG = DCI.DAG;
15122 SDLoc DL(LD);
15123 SDValue BasePtr = LD->getBasePtr();
15124 SDValue NewLD1 =
15125 DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
15126 LD->getAlign(), LD->getMemOperand()->getFlags());
15127
15128 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
15129 DAG.getConstant(4, DL, MVT::i32));
15130
15131 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
15132 LD->getPointerInfo().getWithOffset(4),
15133 commonAlignment(LD->getAlign(), 4),
15134 LD->getMemOperand()->getFlags());
15135
15136 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
15137 if (DCI.DAG.getDataLayout().isBigEndian())
15138 std::swap (NewLD1, NewLD2);
15139 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
15140 return Result;
15141 }
15142
15143 // VMOVRRD(extract(..(build_vector(a, b, c, d)))) -> a,b or c,d
15144 // VMOVRRD(extract(insert_vector(insert_vector(.., a, l1), b, l2))) -> a,b
15145 if (InDouble.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15146 isa<ConstantSDNode>(InDouble.getOperand(1))) {
15147 SDValue BV = InDouble.getOperand(0);
15148 // Look up through any nop bitcasts and vector_reg_casts. bitcasts may
15149 // change lane order under big endian.
15150 bool BVSwap = BV.getOpcode() == ISD::BITCAST;
15151 while (
15152 (BV.getOpcode() == ISD::BITCAST ||
15154 (BV.getValueType() == MVT::v2f64 || BV.getValueType() == MVT::v2i64)) {
15155 BVSwap = BV.getOpcode() == ISD::BITCAST;
15156 BV = BV.getOperand(0);
15157 }
15158 if (BV.getValueType() != MVT::v4i32)
15159 return SDValue();
15160
15161 // Handle buildvectors, pulling out the correct lane depending on
15162 // endianness.
15163 unsigned Offset = InDouble.getConstantOperandVal(1) == 1 ? 2 : 0;
15164 if (BV.getOpcode() == ISD::BUILD_VECTOR) {
15165 SDValue Op0 = BV.getOperand(Offset);
15166 SDValue Op1 = BV.getOperand(Offset + 1);
15167 if (!Subtarget->isLittle() && BVSwap)
15168 std::swap(Op0, Op1);
15169
15170 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15171 }
15172
15173 // A chain of insert_vectors, grabbing the correct value of the chain of
15174 // inserts.
15175 SDValue Op0, Op1;
15176 while (BV.getOpcode() == ISD::INSERT_VECTOR_ELT) {
15177 if (isa<ConstantSDNode>(BV.getOperand(2))) {
15178 if (BV.getConstantOperandVal(2) == Offset && !Op0)
15179 Op0 = BV.getOperand(1);
15180 if (BV.getConstantOperandVal(2) == Offset + 1 && !Op1)
15181 Op1 = BV.getOperand(1);
15182 }
15183 BV = BV.getOperand(0);
15184 }
15185 if (!Subtarget->isLittle() && BVSwap)
15186 std::swap(Op0, Op1);
15187 if (Op0 && Op1)
15188 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15189 }
15190
15191 return SDValue();
15192}
15193
15194/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
15195/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
15197 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
15198 SDValue Op0 = N->getOperand(0);
15199 SDValue Op1 = N->getOperand(1);
15200 if (Op0.getOpcode() == ISD::BITCAST)
15201 Op0 = Op0.getOperand(0);
15202 if (Op1.getOpcode() == ISD::BITCAST)
15203 Op1 = Op1.getOperand(0);
15204 if (Op0.getOpcode() == ARMISD::VMOVRRD &&
15205 Op0.getNode() == Op1.getNode() &&
15206 Op0.getResNo() == 0 && Op1.getResNo() == 1)
15207 return DAG.getNode(ISD::BITCAST, SDLoc(N),
15208 N->getValueType(0), Op0.getOperand(0));
15209 return SDValue();
15210}
15211
15214 SDValue Op0 = N->getOperand(0);
15215
15216 // VMOVhr (VMOVrh (X)) -> X
15217 if (Op0->getOpcode() == ARMISD::VMOVrh)
15218 return Op0->getOperand(0);
15219
15220 // FullFP16: half values are passed in S-registers, and we don't
15221 // need any of the bitcast and moves:
15222 //
15223 // t2: f32,ch1,gl1? = CopyFromReg ch, Register:f32 %0, gl?
15224 // t5: i32 = bitcast t2
15225 // t18: f16 = ARMISD::VMOVhr t5
15226 // =>
15227 // tN: f16,ch2,gl2? = CopyFromReg ch, Register::f32 %0, gl?
15228 if (Op0->getOpcode() == ISD::BITCAST) {
15229 SDValue Copy = Op0->getOperand(0);
15230 if (Copy.getValueType() == MVT::f32 &&
15231 Copy->getOpcode() == ISD::CopyFromReg) {
15232 bool HasGlue = Copy->getNumOperands() == 3;
15233 SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1),
15234 HasGlue ? Copy->getOperand(2) : SDValue()};
15235 EVT OutTys[] = {N->getValueType(0), MVT::Other, MVT::Glue};
15236 SDValue NewCopy =
15238 DCI.DAG.getVTList(ArrayRef(OutTys, HasGlue ? 3 : 2)),
15239 ArrayRef(Ops, HasGlue ? 3 : 2));
15240
15241 // Update Users, Chains, and Potential Glue.
15242 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), NewCopy.getValue(0));
15243 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(1), NewCopy.getValue(1));
15244 if (HasGlue)
15245 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(2),
15246 NewCopy.getValue(2));
15247
15248 return NewCopy;
15249 }
15250 }
15251
15252 // fold (VMOVhr (load x)) -> (load (f16*)x)
15253 if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
15254 if (LN0->hasOneUse() && LN0->isUnindexed() &&
15255 LN0->getMemoryVT() == MVT::i16) {
15256 SDValue Load =
15257 DCI.DAG.getLoad(N->getValueType(0), SDLoc(N), LN0->getChain(),
15258 LN0->getBasePtr(), LN0->getMemOperand());
15259 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15260 DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
15261 return Load;
15262 }
15263 }
15264
15265 // Only the bottom 16 bits of the source register are used.
15266 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15267 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15268 if (TLI.SimplifyDemandedBits(Op0, DemandedMask, DCI))
15269 return SDValue(N, 0);
15270
15271 return SDValue();
15272}
15273
15275 SDValue N0 = N->getOperand(0);
15276 EVT VT = N->getValueType(0);
15277
15278 // fold (VMOVrh (fpconst x)) -> const x
15279 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0)) {
15280 APFloat V = C->getValueAPF();
15281 return DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT);
15282 }
15283
15284 // fold (VMOVrh (load x)) -> (zextload (i16*)x)
15285 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
15286 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15287
15288 SDValue Load =
15289 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(),
15290 LN0->getBasePtr(), MVT::i16, LN0->getMemOperand());
15291 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15292 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15293 return Load;
15294 }
15295
15296 // Fold VMOVrh(extract(x, n)) -> vgetlaneu(x, n)
15297 if (N0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15298 isa<ConstantSDNode>(N0->getOperand(1)))
15299 return DAG.getNode(ARMISD::VGETLANEu, SDLoc(N), VT, N0->getOperand(0),
15300 N0->getOperand(1));
15301
15302 return SDValue();
15303}
15304
15305/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
15306/// are normal, non-volatile loads. If so, it is profitable to bitcast an
15307/// i64 vector to have f64 elements, since the value can then be loaded
15308/// directly into a VFP register.
15310 unsigned NumElts = N->getValueType(0).getVectorNumElements();
15311 for (unsigned i = 0; i < NumElts; ++i) {
15312 SDNode *Elt = N->getOperand(i).getNode();
15313 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
15314 return true;
15315 }
15316 return false;
15317}
15318
15319/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
15320/// ISD::BUILD_VECTOR.
15323 const ARMSubtarget *Subtarget) {
15324 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
15325 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
15326 // into a pair of GPRs, which is fine when the value is used as a scalar,
15327 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
15328 SelectionDAG &DAG = DCI.DAG;
15329 if (N->getNumOperands() == 2)
15330 if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
15331 return RV;
15332
15333 // Load i64 elements as f64 values so that type legalization does not split
15334 // them up into i32 values.
15335 EVT VT = N->getValueType(0);
15336 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
15337 return SDValue();
15338 SDLoc dl(N);
15340 unsigned NumElts = VT.getVectorNumElements();
15341 for (unsigned i = 0; i < NumElts; ++i) {
15342 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
15343 Ops.push_back(V);
15344 // Make the DAGCombiner fold the bitcast.
15345 DCI.AddToWorklist(V.getNode());
15346 }
15347 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
15348 SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
15349 return DAG.getNode(ISD::BITCAST, dl, VT, BV);
15350}
15351
15352/// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
15353static SDValue
15355 // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
15356 // At that time, we may have inserted bitcasts from integer to float.
15357 // If these bitcasts have survived DAGCombine, change the lowering of this
15358 // BUILD_VECTOR in something more vector friendly, i.e., that does not
15359 // force to use floating point types.
15360
15361 // Make sure we can change the type of the vector.
15362 // This is possible iff:
15363 // 1. The vector is only used in a bitcast to a integer type. I.e.,
15364 // 1.1. Vector is used only once.
15365 // 1.2. Use is a bit convert to an integer type.
15366 // 2. The size of its operands are 32-bits (64-bits are not legal).
15367 EVT VT = N->getValueType(0);
15368 EVT EltVT = VT.getVectorElementType();
15369
15370 // Check 1.1. and 2.
15371 if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
15372 return SDValue();
15373
15374 // By construction, the input type must be float.
15375 assert(EltVT == MVT::f32 && "Unexpected type!");
15376
15377 // Check 1.2.
15378 SDNode *Use = *N->user_begin();
15379 if (Use->getOpcode() != ISD::BITCAST ||
15380 Use->getValueType(0).isFloatingPoint())
15381 return SDValue();
15382
15383 // Check profitability.
15384 // Model is, if more than half of the relevant operands are bitcast from
15385 // i32, turn the build_vector into a sequence of insert_vector_elt.
15386 // Relevant operands are everything that is not statically
15387 // (i.e., at compile time) bitcasted.
15388 unsigned NumOfBitCastedElts = 0;
15389 unsigned NumElts = VT.getVectorNumElements();
15390 unsigned NumOfRelevantElts = NumElts;
15391 for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
15392 SDValue Elt = N->getOperand(Idx);
15393 if (Elt->getOpcode() == ISD::BITCAST) {
15394 // Assume only bit cast to i32 will go away.
15395 if (Elt->getOperand(0).getValueType() == MVT::i32)
15396 ++NumOfBitCastedElts;
15397 } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
15398 // Constants are statically casted, thus do not count them as
15399 // relevant operands.
15400 --NumOfRelevantElts;
15401 }
15402
15403 // Check if more than half of the elements require a non-free bitcast.
15404 if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
15405 return SDValue();
15406
15407 SelectionDAG &DAG = DCI.DAG;
15408 // Create the new vector type.
15409 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
15410 // Check if the type is legal.
15411 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15412 if (!TLI.isTypeLegal(VecVT))
15413 return SDValue();
15414
15415 // Combine:
15416 // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
15417 // => BITCAST INSERT_VECTOR_ELT
15418 // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
15419 // (BITCAST EN), N.
15420 SDValue Vec = DAG.getUNDEF(VecVT);
15421 SDLoc dl(N);
15422 for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
15423 SDValue V = N->getOperand(Idx);
15424 if (V.isUndef())
15425 continue;
15426 if (V.getOpcode() == ISD::BITCAST &&
15427 V->getOperand(0).getValueType() == MVT::i32)
15428 // Fold obvious case.
15429 V = V.getOperand(0);
15430 else {
15431 V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
15432 // Make the DAGCombiner fold the bitcasts.
15433 DCI.AddToWorklist(V.getNode());
15434 }
15435 SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
15436 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
15437 }
15438 Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
15439 // Make the DAGCombiner fold the bitcasts.
15440 DCI.AddToWorklist(Vec.getNode());
15441 return Vec;
15442}
15443
15444static SDValue
15446 EVT VT = N->getValueType(0);
15447 SDValue Op = N->getOperand(0);
15448 SDLoc dl(N);
15449
15450 // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x)
15451 if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {
15452 // If the valuetypes are the same, we can remove the cast entirely.
15453 if (Op->getOperand(0).getValueType() == VT)
15454 return Op->getOperand(0);
15455 return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15456 }
15457
15458 // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
15459 // more VPNOT which might get folded as else predicates.
15460 if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) {
15461 SDValue X =
15462 DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15464 DCI.DAG.getConstant(65535, dl, MVT::i32));
15465 return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C);
15466 }
15467
15468 // Only the bottom 16 bits of the source register are used.
15469 if (Op.getValueType() == MVT::i32) {
15470 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15471 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15472 if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
15473 return SDValue(N, 0);
15474 }
15475 return SDValue();
15476}
15477
15479 const ARMSubtarget *ST) {
15480 EVT VT = N->getValueType(0);
15481 SDValue Op = N->getOperand(0);
15482 SDLoc dl(N);
15483
15484 // Under Little endian, a VECTOR_REG_CAST is equivalent to a BITCAST
15485 if (ST->isLittle())
15486 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
15487
15488 // VT VECTOR_REG_CAST (VT Op) -> Op
15489 if (Op.getValueType() == VT)
15490 return Op;
15491 // VECTOR_REG_CAST undef -> undef
15492 if (Op.isUndef())
15493 return DAG.getUNDEF(VT);
15494
15495 // VECTOR_REG_CAST(VECTOR_REG_CAST(x)) == VECTOR_REG_CAST(x)
15496 if (Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {
15497 // If the valuetypes are the same, we can remove the cast entirely.
15498 if (Op->getOperand(0).getValueType() == VT)
15499 return Op->getOperand(0);
15500 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Op->getOperand(0));
15501 }
15502
15503 return SDValue();
15504}
15505
15507 const ARMSubtarget *Subtarget) {
15508 if (!Subtarget->hasMVEIntegerOps())
15509 return SDValue();
15510
15511 EVT VT = N->getValueType(0);
15512 SDValue Op0 = N->getOperand(0);
15513 SDValue Op1 = N->getOperand(1);
15514 ARMCC::CondCodes Cond = (ARMCC::CondCodes)N->getConstantOperandVal(2);
15515 SDLoc dl(N);
15516
15517 // vcmp X, 0, cc -> vcmpz X, cc
15518 if (isZeroVector(Op1))
15519 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0, N->getOperand(2));
15520
15521 unsigned SwappedCond = getSwappedCondition(Cond);
15522 if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) {
15523 // vcmp 0, X, cc -> vcmpz X, reversed(cc)
15524 if (isZeroVector(Op0))
15525 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1,
15526 DAG.getConstant(SwappedCond, dl, MVT::i32));
15527 // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)
15528 if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)
15529 return DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0,
15530 DAG.getConstant(SwappedCond, dl, MVT::i32));
15531 }
15532
15533 return SDValue();
15534}
15535
15536/// PerformInsertEltCombine - Target-specific dag combine xforms for
15537/// ISD::INSERT_VECTOR_ELT.
15540 // Bitcast an i64 load inserted into a vector to f64.
15541 // Otherwise, the i64 value will be legalized to a pair of i32 values.
15542 EVT VT = N->getValueType(0);
15543 SDNode *Elt = N->getOperand(1).getNode();
15544 if (VT.getVectorElementType() != MVT::i64 ||
15545 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
15546 return SDValue();
15547
15548 SelectionDAG &DAG = DCI.DAG;
15549 SDLoc dl(N);
15550 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
15552 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
15553 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
15554 // Make the DAGCombiner fold the bitcasts.
15555 DCI.AddToWorklist(Vec.getNode());
15556 DCI.AddToWorklist(V.getNode());
15557 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
15558 Vec, V, N->getOperand(2));
15559 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
15560}
15561
15562// Convert a pair of extracts from the same base vector to a VMOVRRD. Either
15563// directly or bitcast to an integer if the original is a float vector.
15564// extract(x, n); extract(x, n+1) -> VMOVRRD(extract v2f64 x, n/2)
15565// bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD(extract x, n/2)
15566static SDValue
15568 EVT VT = N->getValueType(0);
15569 SDLoc dl(N);
15570
15571 if (!DCI.isAfterLegalizeDAG() || VT != MVT::i32 ||
15572 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(MVT::f64))
15573 return SDValue();
15574
15575 SDValue Ext = SDValue(N, 0);
15576 if (Ext.getOpcode() == ISD::BITCAST &&
15577 Ext.getOperand(0).getValueType() == MVT::f32)
15578 Ext = Ext.getOperand(0);
15579 if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15580 !isa<ConstantSDNode>(Ext.getOperand(1)) ||
15581 Ext.getConstantOperandVal(1) % 2 != 0)
15582 return SDValue();
15583 if (Ext->hasOneUse() && (Ext->user_begin()->getOpcode() == ISD::SINT_TO_FP ||
15584 Ext->user_begin()->getOpcode() == ISD::UINT_TO_FP))
15585 return SDValue();
15586
15587 SDValue Op0 = Ext.getOperand(0);
15588 EVT VecVT = Op0.getValueType();
15589 unsigned ResNo = Op0.getResNo();
15590 unsigned Lane = Ext.getConstantOperandVal(1);
15591 if (VecVT.getVectorNumElements() != 4)
15592 return SDValue();
15593
15594 // Find another extract, of Lane + 1
15595 auto OtherIt = find_if(Op0->users(), [&](SDNode *V) {
15596 return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15597 isa<ConstantSDNode>(V->getOperand(1)) &&
15598 V->getConstantOperandVal(1) == Lane + 1 &&
15599 V->getOperand(0).getResNo() == ResNo;
15600 });
15601 if (OtherIt == Op0->users().end())
15602 return SDValue();
15603
15604 // For float extracts, we need to be converting to a i32 for both vector
15605 // lanes.
15606 SDValue OtherExt(*OtherIt, 0);
15607 if (OtherExt.getValueType() != MVT::i32) {
15608 if (!OtherExt->hasOneUse() ||
15609 OtherExt->user_begin()->getOpcode() != ISD::BITCAST ||
15610 OtherExt->user_begin()->getValueType(0) != MVT::i32)
15611 return SDValue();
15612 OtherExt = SDValue(*OtherExt->user_begin(), 0);
15613 }
15614
15615 // Convert the type to a f64 and extract with a VMOVRRD.
15616 SDValue F64 = DCI.DAG.getNode(
15617 ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
15618 DCI.DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v2f64, Op0),
15619 DCI.DAG.getConstant(Ext.getConstantOperandVal(1) / 2, dl, MVT::i32));
15620 SDValue VMOVRRD =
15621 DCI.DAG.getNode(ARMISD::VMOVRRD, dl, {MVT::i32, MVT::i32}, F64);
15622
15623 DCI.CombineTo(OtherExt.getNode(), SDValue(VMOVRRD.getNode(), 1));
15624 return VMOVRRD;
15625}
15626
15629 const ARMSubtarget *ST) {
15630 SDValue Op0 = N->getOperand(0);
15631 EVT VT = N->getValueType(0);
15632 SDLoc dl(N);
15633
15634 // extract (vdup x) -> x
15635 if (Op0->getOpcode() == ARMISD::VDUP) {
15636 SDValue X = Op0->getOperand(0);
15637 if (VT == MVT::f16 && X.getValueType() == MVT::i32)
15638 return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X);
15639 if (VT == MVT::i32 && X.getValueType() == MVT::f16)
15640 return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X);
15641 if (VT == MVT::f32 && X.getValueType() == MVT::i32)
15642 return DCI.DAG.getNode(ISD::BITCAST, dl, VT, X);
15643
15644 while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
15645 X = X->getOperand(0);
15646 if (X.getValueType() == VT)
15647 return X;
15648 }
15649
15650 // extract ARM_BUILD_VECTOR -> x
15651 if (Op0->getOpcode() == ARMISD::BUILD_VECTOR &&
15652 isa<ConstantSDNode>(N->getOperand(1)) &&
15653 N->getConstantOperandVal(1) < Op0.getNumOperands()) {
15654 return Op0.getOperand(N->getConstantOperandVal(1));
15655 }
15656
15657 // extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b
15658 if (Op0.getValueType() == MVT::v4i32 &&
15659 isa<ConstantSDNode>(N->getOperand(1)) &&
15660 Op0.getOpcode() == ISD::BITCAST &&
15662 Op0.getOperand(0).getValueType() == MVT::v2f64) {
15663 SDValue BV = Op0.getOperand(0);
15664 unsigned Offset = N->getConstantOperandVal(1);
15665 SDValue MOV = BV.getOperand(Offset < 2 ? 0 : 1);
15666 if (MOV.getOpcode() == ARMISD::VMOVDRR)
15667 return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);
15668 }
15669
15670 // extract x, n; extract x, n+1 -> VMOVRRD x
15671 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
15672 return R;
15673
15674 // extract (MVETrunc(x)) -> extract x
15675 if (Op0->getOpcode() == ARMISD::MVETRUNC) {
15676 unsigned Idx = N->getConstantOperandVal(1);
15677 unsigned Vec =
15679 unsigned SubIdx =
15681 return DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Op0.getOperand(Vec),
15682 DCI.DAG.getConstant(SubIdx, dl, MVT::i32));
15683 }
15684
15685 return SDValue();
15686}
15687
15689 SDValue Op = N->getOperand(0);
15690 EVT VT = N->getValueType(0);
15691
15692 // sext_inreg(VGETLANEu) -> VGETLANEs
15693 if (Op.getOpcode() == ARMISD::VGETLANEu &&
15694 cast<VTSDNode>(N->getOperand(1))->getVT() ==
15695 Op.getOperand(0).getValueType().getScalarType())
15696 return DAG.getNode(ARMISD::VGETLANEs, SDLoc(N), VT, Op.getOperand(0),
15697 Op.getOperand(1));
15698
15699 return SDValue();
15700}
15701
15702static SDValue
15704 SDValue Vec = N->getOperand(0);
15705 SDValue SubVec = N->getOperand(1);
15706 uint64_t IdxVal = N->getConstantOperandVal(2);
15707 EVT VecVT = Vec.getValueType();
15708 EVT SubVT = SubVec.getValueType();
15709
15710 // Only do this for legal fixed vector types.
15711 if (!VecVT.isFixedLengthVector() ||
15712 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
15714 return SDValue();
15715
15716 // Ignore widening patterns.
15717 if (IdxVal == 0 && Vec.isUndef())
15718 return SDValue();
15719
15720 // Subvector must be half the width and an "aligned" insertion.
15721 unsigned NumSubElts = SubVT.getVectorNumElements();
15722 if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
15723 (IdxVal != 0 && IdxVal != NumSubElts))
15724 return SDValue();
15725
15726 // Fold insert_subvector -> concat_vectors
15727 // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
15728 // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
15729 SDLoc DL(N);
15730 SDValue Lo, Hi;
15731 if (IdxVal == 0) {
15732 Lo = SubVec;
15733 Hi = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15734 DCI.DAG.getVectorIdxConstant(NumSubElts, DL));
15735 } else {
15736 Lo = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15737 DCI.DAG.getVectorIdxConstant(0, DL));
15738 Hi = SubVec;
15739 }
15740 return DCI.DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
15741}
15742
15743// shuffle(MVETrunc(x, y)) -> VMOVN(x, y)
15745 SelectionDAG &DAG) {
15746 SDValue Trunc = N->getOperand(0);
15747 EVT VT = Trunc.getValueType();
15748 if (Trunc.getOpcode() != ARMISD::MVETRUNC || !N->getOperand(1).isUndef())
15749 return SDValue();
15750
15751 SDLoc DL(Trunc);
15752 if (isVMOVNTruncMask(N->getMask(), VT, false))
15753 return DAG.getNode(
15754 ARMISD::VMOVN, DL, VT,
15755 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15756 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15757 DAG.getConstant(1, DL, MVT::i32));
15758 else if (isVMOVNTruncMask(N->getMask(), VT, true))
15759 return DAG.getNode(
15760 ARMISD::VMOVN, DL, VT,
15761 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15762 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15763 DAG.getConstant(1, DL, MVT::i32));
15764 return SDValue();
15765}
15766
15767/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
15768/// ISD::VECTOR_SHUFFLE.
15770 if (SDValue R = PerformShuffleVMOVNCombine(cast<ShuffleVectorSDNode>(N), DAG))
15771 return R;
15772
15773 // The LLVM shufflevector instruction does not require the shuffle mask
15774 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
15775 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
15776 // operands do not match the mask length, they are extended by concatenating
15777 // them with undef vectors. That is probably the right thing for other
15778 // targets, but for NEON it is better to concatenate two double-register
15779 // size vector operands into a single quad-register size vector. Do that
15780 // transformation here:
15781 // shuffle(concat(v1, undef), concat(v2, undef)) ->
15782 // shuffle(concat(v1, v2), undef)
15783 SDValue Op0 = N->getOperand(0);
15784 SDValue Op1 = N->getOperand(1);
15785 if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
15786 Op1.getOpcode() != ISD::CONCAT_VECTORS ||
15787 Op0.getNumOperands() != 2 ||
15788 Op1.getNumOperands() != 2)
15789 return SDValue();
15790 SDValue Concat0Op1 = Op0.getOperand(1);
15791 SDValue Concat1Op1 = Op1.getOperand(1);
15792 if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
15793 return SDValue();
15794 // Skip the transformation if any of the types are illegal.
15795 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15796 EVT VT = N->getValueType(0);
15797 if (!TLI.isTypeLegal(VT) ||
15798 !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
15799 !TLI.isTypeLegal(Concat1Op1.getValueType()))
15800 return SDValue();
15801
15802 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
15803 Op0.getOperand(0), Op1.getOperand(0));
15804 // Translate the shuffle mask.
15805 SmallVector<int, 16> NewMask;
15806 unsigned NumElts = VT.getVectorNumElements();
15807 unsigned HalfElts = NumElts/2;
15808 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15809 for (unsigned n = 0; n < NumElts; ++n) {
15810 int MaskElt = SVN->getMaskElt(n);
15811 int NewElt = -1;
15812 if (MaskElt < (int)HalfElts)
15813 NewElt = MaskElt;
15814 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
15815 NewElt = HalfElts + MaskElt - NumElts;
15816 NewMask.push_back(NewElt);
15817 }
15818 return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
15819 DAG.getUNDEF(VT), NewMask);
15820}
15821
15822/// Load/store instruction that can be merged with a base address
15823/// update
15828 unsigned AddrOpIdx;
15829};
15830
15832 /// Instruction that updates a pointer
15834 /// Pointer increment operand
15836 /// Pointer increment value if it is a constant, or 0 otherwise
15837 unsigned ConstInc;
15838};
15839
15841 struct BaseUpdateUser &User,
15842 bool SimpleConstIncOnly,
15844 SelectionDAG &DAG = DCI.DAG;
15845 SDNode *N = Target.N;
15846 MemSDNode *MemN = cast<MemSDNode>(N);
15847 SDLoc dl(N);
15848
15849 // Find the new opcode for the updating load/store.
15850 bool isLoadOp = true;
15851 bool isLaneOp = false;
15852 // Workaround for vst1x and vld1x intrinsics which do not have alignment
15853 // as an operand.
15854 bool hasAlignment = true;
15855 unsigned NewOpc = 0;
15856 unsigned NumVecs = 0;
15857 if (Target.isIntrinsic) {
15858 unsigned IntNo = N->getConstantOperandVal(1);
15859 switch (IntNo) {
15860 default:
15861 llvm_unreachable("unexpected intrinsic for Neon base update");
15862 case Intrinsic::arm_neon_vld1:
15863 NewOpc = ARMISD::VLD1_UPD;
15864 NumVecs = 1;
15865 break;
15866 case Intrinsic::arm_neon_vld2:
15867 NewOpc = ARMISD::VLD2_UPD;
15868 NumVecs = 2;
15869 break;
15870 case Intrinsic::arm_neon_vld3:
15871 NewOpc = ARMISD::VLD3_UPD;
15872 NumVecs = 3;
15873 break;
15874 case Intrinsic::arm_neon_vld4:
15875 NewOpc = ARMISD::VLD4_UPD;
15876 NumVecs = 4;
15877 break;
15878 case Intrinsic::arm_neon_vld1x2:
15879 NewOpc = ARMISD::VLD1x2_UPD;
15880 NumVecs = 2;
15881 hasAlignment = false;
15882 break;
15883 case Intrinsic::arm_neon_vld1x3:
15884 NewOpc = ARMISD::VLD1x3_UPD;
15885 NumVecs = 3;
15886 hasAlignment = false;
15887 break;
15888 case Intrinsic::arm_neon_vld1x4:
15889 NewOpc = ARMISD::VLD1x4_UPD;
15890 NumVecs = 4;
15891 hasAlignment = false;
15892 break;
15893 case Intrinsic::arm_neon_vld2dup:
15894 NewOpc = ARMISD::VLD2DUP_UPD;
15895 NumVecs = 2;
15896 break;
15897 case Intrinsic::arm_neon_vld3dup:
15898 NewOpc = ARMISD::VLD3DUP_UPD;
15899 NumVecs = 3;
15900 break;
15901 case Intrinsic::arm_neon_vld4dup:
15902 NewOpc = ARMISD::VLD4DUP_UPD;
15903 NumVecs = 4;
15904 break;
15905 case Intrinsic::arm_neon_vld2lane:
15906 NewOpc = ARMISD::VLD2LN_UPD;
15907 NumVecs = 2;
15908 isLaneOp = true;
15909 break;
15910 case Intrinsic::arm_neon_vld3lane:
15911 NewOpc = ARMISD::VLD3LN_UPD;
15912 NumVecs = 3;
15913 isLaneOp = true;
15914 break;
15915 case Intrinsic::arm_neon_vld4lane:
15916 NewOpc = ARMISD::VLD4LN_UPD;
15917 NumVecs = 4;
15918 isLaneOp = true;
15919 break;
15920 case Intrinsic::arm_neon_vst1:
15921 NewOpc = ARMISD::VST1_UPD;
15922 NumVecs = 1;
15923 isLoadOp = false;
15924 break;
15925 case Intrinsic::arm_neon_vst2:
15926 NewOpc = ARMISD::VST2_UPD;
15927 NumVecs = 2;
15928 isLoadOp = false;
15929 break;
15930 case Intrinsic::arm_neon_vst3:
15931 NewOpc = ARMISD::VST3_UPD;
15932 NumVecs = 3;
15933 isLoadOp = false;
15934 break;
15935 case Intrinsic::arm_neon_vst4:
15936 NewOpc = ARMISD::VST4_UPD;
15937 NumVecs = 4;
15938 isLoadOp = false;
15939 break;
15940 case Intrinsic::arm_neon_vst2lane:
15941 NewOpc = ARMISD::VST2LN_UPD;
15942 NumVecs = 2;
15943 isLoadOp = false;
15944 isLaneOp = true;
15945 break;
15946 case Intrinsic::arm_neon_vst3lane:
15947 NewOpc = ARMISD::VST3LN_UPD;
15948 NumVecs = 3;
15949 isLoadOp = false;
15950 isLaneOp = true;
15951 break;
15952 case Intrinsic::arm_neon_vst4lane:
15953 NewOpc = ARMISD::VST4LN_UPD;
15954 NumVecs = 4;
15955 isLoadOp = false;
15956 isLaneOp = true;
15957 break;
15958 case Intrinsic::arm_neon_vst1x2:
15959 NewOpc = ARMISD::VST1x2_UPD;
15960 NumVecs = 2;
15961 isLoadOp = false;
15962 hasAlignment = false;
15963 break;
15964 case Intrinsic::arm_neon_vst1x3:
15965 NewOpc = ARMISD::VST1x3_UPD;
15966 NumVecs = 3;
15967 isLoadOp = false;
15968 hasAlignment = false;
15969 break;
15970 case Intrinsic::arm_neon_vst1x4:
15971 NewOpc = ARMISD::VST1x4_UPD;
15972 NumVecs = 4;
15973 isLoadOp = false;
15974 hasAlignment = false;
15975 break;
15976 }
15977 } else {
15978 isLaneOp = true;
15979 switch (N->getOpcode()) {
15980 default:
15981 llvm_unreachable("unexpected opcode for Neon base update");
15982 case ARMISD::VLD1DUP:
15983 NewOpc = ARMISD::VLD1DUP_UPD;
15984 NumVecs = 1;
15985 break;
15986 case ARMISD::VLD2DUP:
15987 NewOpc = ARMISD::VLD2DUP_UPD;
15988 NumVecs = 2;
15989 break;
15990 case ARMISD::VLD3DUP:
15991 NewOpc = ARMISD::VLD3DUP_UPD;
15992 NumVecs = 3;
15993 break;
15994 case ARMISD::VLD4DUP:
15995 NewOpc = ARMISD::VLD4DUP_UPD;
15996 NumVecs = 4;
15997 break;
15998 case ISD::LOAD:
15999 NewOpc = ARMISD::VLD1_UPD;
16000 NumVecs = 1;
16001 isLaneOp = false;
16002 break;
16003 case ISD::STORE:
16004 NewOpc = ARMISD::VST1_UPD;
16005 NumVecs = 1;
16006 isLaneOp = false;
16007 isLoadOp = false;
16008 break;
16009 }
16010 }
16011
16012 // Find the size of memory referenced by the load/store.
16013 EVT VecTy;
16014 if (isLoadOp) {
16015 VecTy = N->getValueType(0);
16016 } else if (Target.isIntrinsic) {
16017 VecTy = N->getOperand(Target.AddrOpIdx + 1).getValueType();
16018 } else {
16019 assert(Target.isStore &&
16020 "Node has to be a load, a store, or an intrinsic!");
16021 VecTy = N->getOperand(1).getValueType();
16022 }
16023
16024 bool isVLDDUPOp =
16025 NewOpc == ARMISD::VLD1DUP_UPD || NewOpc == ARMISD::VLD2DUP_UPD ||
16026 NewOpc == ARMISD::VLD3DUP_UPD || NewOpc == ARMISD::VLD4DUP_UPD;
16027
16028 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
16029 if (isLaneOp || isVLDDUPOp)
16030 NumBytes /= VecTy.getVectorNumElements();
16031
16032 if (NumBytes >= 3 * 16 && User.ConstInc != NumBytes) {
16033 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
16034 // separate instructions that make it harder to use a non-constant update.
16035 return false;
16036 }
16037
16038 if (SimpleConstIncOnly && User.ConstInc != NumBytes)
16039 return false;
16040
16041 // OK, we found an ADD we can fold into the base update.
16042 // Now, create a _UPD node, taking care of not breaking alignment.
16043
16044 EVT AlignedVecTy = VecTy;
16045 Align Alignment = MemN->getAlign();
16046
16047 // If this is a less-than-standard-aligned load/store, change the type to
16048 // match the standard alignment.
16049 // The alignment is overlooked when selecting _UPD variants; and it's
16050 // easier to introduce bitcasts here than fix that.
16051 // There are 3 ways to get to this base-update combine:
16052 // - intrinsics: they are assumed to be properly aligned (to the standard
16053 // alignment of the memory type), so we don't need to do anything.
16054 // - ARMISD::VLDx nodes: they are only generated from the aforementioned
16055 // intrinsics, so, likewise, there's nothing to do.
16056 // - generic load/store instructions: the alignment is specified as an
16057 // explicit operand, rather than implicitly as the standard alignment
16058 // of the memory type (like the intrisics). We need to change the
16059 // memory type to match the explicit alignment. That way, we don't
16060 // generate non-standard-aligned ARMISD::VLDx nodes.
16061 if (isa<LSBaseSDNode>(N)) {
16062 if (Alignment.value() < VecTy.getScalarSizeInBits() / 8) {
16063 MVT EltTy = MVT::getIntegerVT(Alignment.value() * 8);
16064 assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
16065 assert(!isLaneOp && "Unexpected generic load/store lane.");
16066 unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
16067 AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
16068 }
16069 // Don't set an explicit alignment on regular load/stores that we want
16070 // to transform to VLD/VST 1_UPD nodes.
16071 // This matches the behavior of regular load/stores, which only get an
16072 // explicit alignment if the MMO alignment is larger than the standard
16073 // alignment of the memory type.
16074 // Intrinsics, however, always get an explicit alignment, set to the
16075 // alignment of the MMO.
16076 Alignment = Align(1);
16077 }
16078
16079 // Create the new updating load/store node.
16080 // First, create an SDVTList for the new updating node's results.
16081 EVT Tys[6];
16082 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16083 unsigned n;
16084 for (n = 0; n < NumResultVecs; ++n)
16085 Tys[n] = AlignedVecTy;
16086 Tys[n++] = MVT::i32;
16087 Tys[n] = MVT::Other;
16088 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16089
16090 // Then, gather the new node's operands.
16092 Ops.push_back(N->getOperand(0)); // incoming chain
16093 Ops.push_back(N->getOperand(Target.AddrOpIdx));
16094 Ops.push_back(User.Inc);
16095
16096 if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
16097 // Try to match the intrinsic's signature
16098 Ops.push_back(StN->getValue());
16099 } else {
16100 // Loads (and of course intrinsics) match the intrinsics' signature,
16101 // so just add all but the alignment operand.
16102 unsigned LastOperand =
16103 hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();
16104 for (unsigned i = Target.AddrOpIdx + 1; i < LastOperand; ++i)
16105 Ops.push_back(N->getOperand(i));
16106 }
16107
16108 // For all node types, the alignment operand is always the last one.
16109 Ops.push_back(DAG.getConstant(Alignment.value(), dl, MVT::i32));
16110
16111 // If this is a non-standard-aligned STORE, the penultimate operand is the
16112 // stored value. Bitcast it to the aligned type.
16113 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
16114 SDValue &StVal = Ops[Ops.size() - 2];
16115 StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
16116 }
16117
16118 EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
16119 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
16120 MemN->getMemOperand());
16121
16122 // Update the uses.
16123 SmallVector<SDValue, 5> NewResults;
16124 for (unsigned i = 0; i < NumResultVecs; ++i)
16125 NewResults.push_back(SDValue(UpdN.getNode(), i));
16126
16127 // If this is an non-standard-aligned LOAD, the first result is the loaded
16128 // value. Bitcast it to the expected result type.
16129 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
16130 SDValue &LdVal = NewResults[0];
16131 LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
16132 }
16133
16134 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16135 DCI.CombineTo(N, NewResults);
16136 DCI.CombineTo(User.N, SDValue(UpdN.getNode(), NumResultVecs));
16137
16138 return true;
16139}
16140
16141// If (opcode ptr inc) is and ADD-like instruction, return the
16142// increment value. Otherwise return 0.
16143static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr,
16144 SDValue Inc, const SelectionDAG &DAG) {
16145 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16146 if (!CInc)
16147 return 0;
16148
16149 switch (Opcode) {
16150 case ARMISD::VLD1_UPD:
16151 case ISD::ADD:
16152 return CInc->getZExtValue();
16153 case ISD::OR: {
16154 if (DAG.haveNoCommonBitsSet(Ptr, Inc)) {
16155 // (OR ptr inc) is the same as (ADD ptr inc)
16156 return CInc->getZExtValue();
16157 }
16158 return 0;
16159 }
16160 default:
16161 return 0;
16162 }
16163}
16164
16166 switch (N->getOpcode()) {
16167 case ISD::ADD:
16168 case ISD::OR: {
16169 if (isa<ConstantSDNode>(N->getOperand(1))) {
16170 *Ptr = N->getOperand(0);
16171 *CInc = N->getOperand(1);
16172 return true;
16173 }
16174 return false;
16175 }
16176 case ARMISD::VLD1_UPD: {
16177 if (isa<ConstantSDNode>(N->getOperand(2))) {
16178 *Ptr = N->getOperand(1);
16179 *CInc = N->getOperand(2);
16180 return true;
16181 }
16182 return false;
16183 }
16184 default:
16185 return false;
16186 }
16187}
16188
16190 // Check that the add is independent of the load/store.
16191 // Otherwise, folding it would create a cycle. Search through Addr
16192 // as well, since the User may not be a direct user of Addr and
16193 // only share a base pointer.
16196 Worklist.push_back(N);
16197 Worklist.push_back(User);
16198 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16199 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16200 return false;
16201 return true;
16202}
16203
16204/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
16205/// NEON load/store intrinsics, and generic vector load/stores, to merge
16206/// base address updates.
16207/// For generic load/stores, the memory type is assumed to be a vector.
16208/// The caller is assumed to have checked legality.
16211 const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
16212 N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
16213 const bool isStore = N->getOpcode() == ISD::STORE;
16214 const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
16215 BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
16216
16217 SDValue Addr = N->getOperand(AddrOpIdx);
16218
16220
16221 // Search for a use of the address operand that is an increment.
16222 for (SDUse &Use : Addr->uses()) {
16223 SDNode *User = Use.getUser();
16224 if (Use.getResNo() != Addr.getResNo() || User->getNumOperands() != 2)
16225 continue;
16226
16227 SDValue Inc = User->getOperand(Use.getOperandNo() == 1 ? 0 : 1);
16228 unsigned ConstInc =
16229 getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
16230
16231 if (ConstInc || User->getOpcode() == ISD::ADD)
16232 BaseUpdates.push_back({User, Inc, ConstInc});
16233 }
16234
16235 // If the address is a constant pointer increment itself, find
16236 // another constant increment that has the same base operand
16237 SDValue Base;
16238 SDValue CInc;
16239 if (findPointerConstIncrement(Addr.getNode(), &Base, &CInc)) {
16240 unsigned Offset =
16241 getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG);
16242 for (SDUse &Use : Base->uses()) {
16243
16244 SDNode *User = Use.getUser();
16245 if (Use.getResNo() != Base.getResNo() || User == Addr.getNode() ||
16246 User->getNumOperands() != 2)
16247 continue;
16248
16249 SDValue UserInc = User->getOperand(Use.getOperandNo() == 0 ? 1 : 0);
16250 unsigned UserOffset =
16251 getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG);
16252
16253 if (!UserOffset || UserOffset <= Offset)
16254 continue;
16255
16256 unsigned NewConstInc = UserOffset - Offset;
16257 SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
16258 BaseUpdates.push_back({User, NewInc, NewConstInc});
16259 }
16260 }
16261
16262 // Try to fold the load/store with an update that matches memory
16263 // access size. This should work well for sequential loads.
16264 //
16265 // Filter out invalid updates as well.
16266 unsigned NumValidUpd = BaseUpdates.size();
16267 for (unsigned I = 0; I < NumValidUpd;) {
16268 BaseUpdateUser &User = BaseUpdates[I];
16269 if (!isValidBaseUpdate(N, User.N)) {
16270 --NumValidUpd;
16271 std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
16272 continue;
16273 }
16274
16275 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
16276 return SDValue();
16277 ++I;
16278 }
16279 BaseUpdates.resize(NumValidUpd);
16280
16281 // Try to fold with other users. Non-constant updates are considered
16282 // first, and constant updates are sorted to not break a sequence of
16283 // strided accesses (if there is any).
16284 std::stable_sort(BaseUpdates.begin(), BaseUpdates.end(),
16285 [](const BaseUpdateUser &LHS, const BaseUpdateUser &RHS) {
16286 return LHS.ConstInc < RHS.ConstInc;
16287 });
16288 for (BaseUpdateUser &User : BaseUpdates) {
16289 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI))
16290 return SDValue();
16291 }
16292 return SDValue();
16293}
16294
16297 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16298 return SDValue();
16299
16300 return CombineBaseUpdate(N, DCI);
16301}
16302
16305 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16306 return SDValue();
16307
16308 SelectionDAG &DAG = DCI.DAG;
16309 SDValue Addr = N->getOperand(2);
16310 MemSDNode *MemN = cast<MemSDNode>(N);
16311 SDLoc dl(N);
16312
16313 // For the stores, where there are multiple intrinsics we only actually want
16314 // to post-inc the last of the them.
16315 unsigned IntNo = N->getConstantOperandVal(1);
16316 if (IntNo == Intrinsic::arm_mve_vst2q && N->getConstantOperandVal(5) != 1)
16317 return SDValue();
16318 if (IntNo == Intrinsic::arm_mve_vst4q && N->getConstantOperandVal(7) != 3)
16319 return SDValue();
16320
16321 // Search for a use of the address operand that is an increment.
16322 for (SDUse &Use : Addr->uses()) {
16323 SDNode *User = Use.getUser();
16324 if (User->getOpcode() != ISD::ADD || Use.getResNo() != Addr.getResNo())
16325 continue;
16326
16327 // Check that the add is independent of the load/store. Otherwise, folding
16328 // it would create a cycle. We can avoid searching through Addr as it's a
16329 // predecessor to both.
16332 Visited.insert(Addr.getNode());
16333 Worklist.push_back(N);
16334 Worklist.push_back(User);
16335 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16336 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16337 continue;
16338
16339 // Find the new opcode for the updating load/store.
16340 bool isLoadOp = true;
16341 unsigned NewOpc = 0;
16342 unsigned NumVecs = 0;
16343 switch (IntNo) {
16344 default:
16345 llvm_unreachable("unexpected intrinsic for MVE VLDn combine");
16346 case Intrinsic::arm_mve_vld2q:
16347 NewOpc = ARMISD::VLD2_UPD;
16348 NumVecs = 2;
16349 break;
16350 case Intrinsic::arm_mve_vld4q:
16351 NewOpc = ARMISD::VLD4_UPD;
16352 NumVecs = 4;
16353 break;
16354 case Intrinsic::arm_mve_vst2q:
16355 NewOpc = ARMISD::VST2_UPD;
16356 NumVecs = 2;
16357 isLoadOp = false;
16358 break;
16359 case Intrinsic::arm_mve_vst4q:
16360 NewOpc = ARMISD::VST4_UPD;
16361 NumVecs = 4;
16362 isLoadOp = false;
16363 break;
16364 }
16365
16366 // Find the size of memory referenced by the load/store.
16367 EVT VecTy;
16368 if (isLoadOp) {
16369 VecTy = N->getValueType(0);
16370 } else {
16371 VecTy = N->getOperand(3).getValueType();
16372 }
16373
16374 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
16375
16376 // If the increment is a constant, it must match the memory ref size.
16377 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
16378 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16379 if (!CInc || CInc->getZExtValue() != NumBytes)
16380 continue;
16381
16382 // Create the new updating load/store node.
16383 // First, create an SDVTList for the new updating node's results.
16384 EVT Tys[6];
16385 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16386 unsigned n;
16387 for (n = 0; n < NumResultVecs; ++n)
16388 Tys[n] = VecTy;
16389 Tys[n++] = MVT::i32;
16390 Tys[n] = MVT::Other;
16391 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16392
16393 // Then, gather the new node's operands.
16395 Ops.push_back(N->getOperand(0)); // incoming chain
16396 Ops.push_back(N->getOperand(2)); // ptr
16397 Ops.push_back(Inc);
16398
16399 for (unsigned i = 3; i < N->getNumOperands(); ++i)
16400 Ops.push_back(N->getOperand(i));
16401
16402 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, VecTy,
16403 MemN->getMemOperand());
16404
16405 // Update the uses.
16406 SmallVector<SDValue, 5> NewResults;
16407 for (unsigned i = 0; i < NumResultVecs; ++i)
16408 NewResults.push_back(SDValue(UpdN.getNode(), i));
16409
16410 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16411 DCI.CombineTo(N, NewResults);
16412 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
16413
16414 break;
16415 }
16416
16417 return SDValue();
16418}
16419
16420/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
16421/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
16422/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
16423/// return true.
16425 SelectionDAG &DAG = DCI.DAG;
16426 EVT VT = N->getValueType(0);
16427 // vldN-dup instructions only support 64-bit vectors for N > 1.
16428 if (!VT.is64BitVector())
16429 return false;
16430
16431 // Check if the VDUPLANE operand is a vldN-dup intrinsic.
16432 SDNode *VLD = N->getOperand(0).getNode();
16433 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
16434 return false;
16435 unsigned NumVecs = 0;
16436 unsigned NewOpc = 0;
16437 unsigned IntNo = VLD->getConstantOperandVal(1);
16438 if (IntNo == Intrinsic::arm_neon_vld2lane) {
16439 NumVecs = 2;
16440 NewOpc = ARMISD::VLD2DUP;
16441 } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
16442 NumVecs = 3;
16443 NewOpc = ARMISD::VLD3DUP;
16444 } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
16445 NumVecs = 4;
16446 NewOpc = ARMISD::VLD4DUP;
16447 } else {
16448 return false;
16449 }
16450
16451 // First check that all the vldN-lane uses are VDUPLANEs and that the lane
16452 // numbers match the load.
16453 unsigned VLDLaneNo = VLD->getConstantOperandVal(NumVecs + 3);
16454 for (SDUse &Use : VLD->uses()) {
16455 // Ignore uses of the chain result.
16456 if (Use.getResNo() == NumVecs)
16457 continue;
16458 SDNode *User = Use.getUser();
16459 if (User->getOpcode() != ARMISD::VDUPLANE ||
16460 VLDLaneNo != User->getConstantOperandVal(1))
16461 return false;
16462 }
16463
16464 // Create the vldN-dup node.
16465 EVT Tys[5];
16466 unsigned n;
16467 for (n = 0; n < NumVecs; ++n)
16468 Tys[n] = VT;
16469 Tys[n] = MVT::Other;
16470 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumVecs + 1));
16471 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
16472 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
16473 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
16474 Ops, VLDMemInt->getMemoryVT(),
16475 VLDMemInt->getMemOperand());
16476
16477 // Update the uses.
16478 for (SDUse &Use : VLD->uses()) {
16479 unsigned ResNo = Use.getResNo();
16480 // Ignore uses of the chain result.
16481 if (ResNo == NumVecs)
16482 continue;
16483 DCI.CombineTo(Use.getUser(), SDValue(VLDDup.getNode(), ResNo));
16484 }
16485
16486 // Now the vldN-lane intrinsic is dead except for its chain result.
16487 // Update uses of the chain.
16488 std::vector<SDValue> VLDDupResults;
16489 for (unsigned n = 0; n < NumVecs; ++n)
16490 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
16491 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
16492 DCI.CombineTo(VLD, VLDDupResults);
16493
16494 return true;
16495}
16496
16497/// PerformVDUPLANECombine - Target-specific dag combine xforms for
16498/// ARMISD::VDUPLANE.
16501 const ARMSubtarget *Subtarget) {
16502 SDValue Op = N->getOperand(0);
16503 EVT VT = N->getValueType(0);
16504
16505 // On MVE, we just convert the VDUPLANE to a VDUP with an extract.
16506 if (Subtarget->hasMVEIntegerOps()) {
16507 EVT ExtractVT = VT.getVectorElementType();
16508 // We need to ensure we are creating a legal type.
16509 if (!DCI.DAG.getTargetLoweringInfo().isTypeLegal(ExtractVT))
16510 ExtractVT = MVT::i32;
16511 SDValue Extract = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ExtractVT,
16512 N->getOperand(0), N->getOperand(1));
16513 return DCI.DAG.getNode(ARMISD::VDUP, SDLoc(N), VT, Extract);
16514 }
16515
16516 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
16517 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
16518 if (CombineVLDDUP(N, DCI))
16519 return SDValue(N, 0);
16520
16521 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
16522 // redundant. Ignore bit_converts for now; element sizes are checked below.
16523 while (Op.getOpcode() == ISD::BITCAST)
16524 Op = Op.getOperand(0);
16525 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
16526 return SDValue();
16527
16528 // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
16529 unsigned EltSize = Op.getScalarValueSizeInBits();
16530 // The canonical VMOV for a zero vector uses a 32-bit element size.
16531 unsigned Imm = Op.getConstantOperandVal(0);
16532 unsigned EltBits;
16533 if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0)
16534 EltSize = 8;
16535 if (EltSize > VT.getScalarSizeInBits())
16536 return SDValue();
16537
16538 return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
16539}
16540
16541/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
16543 const ARMSubtarget *Subtarget) {
16544 SDValue Op = N->getOperand(0);
16545 SDLoc dl(N);
16546
16547 if (Subtarget->hasMVEIntegerOps()) {
16548 // Convert VDUP f32 -> VDUP BITCAST i32 under MVE, as we know the value will
16549 // need to come from a GPR.
16550 if (Op.getValueType() == MVT::f32)
16551 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16552 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op));
16553 else if (Op.getValueType() == MVT::f16)
16554 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16555 DAG.getNode(ARMISD::VMOVrh, dl, MVT::i32, Op));
16556 }
16557
16558 if (!Subtarget->hasNEON())
16559 return SDValue();
16560
16561 // Match VDUP(LOAD) -> VLD1DUP.
16562 // We match this pattern here rather than waiting for isel because the
16563 // transform is only legal for unindexed loads.
16564 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
16565 if (LD && Op.hasOneUse() && LD->isUnindexed() &&
16566 LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
16567 SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),
16568 DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)};
16569 SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
16570 SDValue VLDDup =
16571 DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, Ops,
16572 LD->getMemoryVT(), LD->getMemOperand());
16573 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
16574 return VLDDup;
16575 }
16576
16577 return SDValue();
16578}
16579
16582 const ARMSubtarget *Subtarget) {
16583 EVT VT = N->getValueType(0);
16584
16585 // If this is a legal vector load, try to combine it into a VLD1_UPD.
16586 if (Subtarget->hasNEON() && ISD::isNormalLoad(N) && VT.isVector() &&
16588 return CombineBaseUpdate(N, DCI);
16589
16590 return SDValue();
16591}
16592
16593// Optimize trunc store (of multiple scalars) to shuffle and store. First,
16594// pack all of the elements in one place. Next, store to memory in fewer
16595// chunks.
16597 SelectionDAG &DAG) {
16598 SDValue StVal = St->getValue();
16599 EVT VT = StVal.getValueType();
16600 if (!St->isTruncatingStore() || !VT.isVector())
16601 return SDValue();
16602 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16603 EVT StVT = St->getMemoryVT();
16604 unsigned NumElems = VT.getVectorNumElements();
16605 assert(StVT != VT && "Cannot truncate to the same type");
16606 unsigned FromEltSz = VT.getScalarSizeInBits();
16607 unsigned ToEltSz = StVT.getScalarSizeInBits();
16608
16609 // From, To sizes and ElemCount must be pow of two
16610 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz))
16611 return SDValue();
16612
16613 // We are going to use the original vector elt for storing.
16614 // Accumulated smaller vector elements must be a multiple of the store size.
16615 if (0 != (NumElems * FromEltSz) % ToEltSz)
16616 return SDValue();
16617
16618 unsigned SizeRatio = FromEltSz / ToEltSz;
16619 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
16620
16621 // Create a type on which we perform the shuffle.
16622 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
16623 NumElems * SizeRatio);
16624 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
16625
16626 SDLoc DL(St);
16627 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
16628 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
16629 for (unsigned i = 0; i < NumElems; ++i)
16630 ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1
16631 : i * SizeRatio;
16632
16633 // Can't shuffle using an illegal type.
16634 if (!TLI.isTypeLegal(WideVecVT))
16635 return SDValue();
16636
16637 SDValue Shuff = DAG.getVectorShuffle(
16638 WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec);
16639 // At this point all of the data is stored at the bottom of the
16640 // register. We now need to save it to mem.
16641
16642 // Find the largest store unit
16643 MVT StoreType = MVT::i8;
16644 for (MVT Tp : MVT::integer_valuetypes()) {
16645 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
16646 StoreType = Tp;
16647 }
16648 // Didn't find a legal store type.
16649 if (!TLI.isTypeLegal(StoreType))
16650 return SDValue();
16651
16652 // Bitcast the original vector into a vector of store-size units
16653 EVT StoreVecVT =
16654 EVT::getVectorVT(*DAG.getContext(), StoreType,
16655 VT.getSizeInBits() / EVT(StoreType).getSizeInBits());
16656 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
16657 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
16659 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
16660 TLI.getPointerTy(DAG.getDataLayout()));
16661 SDValue BasePtr = St->getBasePtr();
16662
16663 // Perform one or more big stores into memory.
16664 unsigned E = (ToEltSz * NumElems) / StoreType.getSizeInBits();
16665 for (unsigned I = 0; I < E; I++) {
16666 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType,
16667 ShuffWide, DAG.getIntPtrConstant(I, DL));
16668 SDValue Ch =
16669 DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
16670 St->getAlign(), St->getMemOperand()->getFlags());
16671 BasePtr =
16672 DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment);
16673 Chains.push_back(Ch);
16674 }
16675 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
16676}
16677
16678// Try taking a single vector store from an fpround (which would otherwise turn
16679// into an expensive buildvector) and splitting it into a series of narrowing
16680// stores.
16682 SelectionDAG &DAG) {
16683 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16684 return SDValue();
16685 SDValue Trunc = St->getValue();
16686 if (Trunc->getOpcode() != ISD::FP_ROUND)
16687 return SDValue();
16688 EVT FromVT = Trunc->getOperand(0).getValueType();
16689 EVT ToVT = Trunc.getValueType();
16690 if (!ToVT.isVector())
16691 return SDValue();
16693 EVT ToEltVT = ToVT.getVectorElementType();
16694 EVT FromEltVT = FromVT.getVectorElementType();
16695
16696 if (FromEltVT != MVT::f32 || ToEltVT != MVT::f16)
16697 return SDValue();
16698
16699 unsigned NumElements = 4;
16700 if (FromVT.getVectorNumElements() % NumElements != 0)
16701 return SDValue();
16702
16703 // Test if the Trunc will be convertable to a VMOVN with a shuffle, and if so
16704 // use the VMOVN over splitting the store. We are looking for patterns of:
16705 // !rev: 0 N 1 N+1 2 N+2 ...
16706 // rev: N 0 N+1 1 N+2 2 ...
16707 // The shuffle may either be a single source (in which case N = NumElts/2) or
16708 // two inputs extended with concat to the same size (in which case N =
16709 // NumElts).
16710 auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) {
16711 ArrayRef<int> M = SVN->getMask();
16712 unsigned NumElts = ToVT.getVectorNumElements();
16713 if (SVN->getOperand(1).isUndef())
16714 NumElts /= 2;
16715
16716 unsigned Off0 = Rev ? NumElts : 0;
16717 unsigned Off1 = Rev ? 0 : NumElts;
16718
16719 for (unsigned I = 0; I < NumElts; I += 2) {
16720 if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2))
16721 return false;
16722 if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2))
16723 return false;
16724 }
16725
16726 return true;
16727 };
16728
16729 if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0)))
16730 if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true))
16731 return SDValue();
16732
16733 LLVMContext &C = *DAG.getContext();
16734 SDLoc DL(St);
16735 // Details about the old store
16736 SDValue Ch = St->getChain();
16737 SDValue BasePtr = St->getBasePtr();
16738 Align Alignment = St->getOriginalAlign();
16740 AAMDNodes AAInfo = St->getAAInfo();
16741
16742 // We split the store into slices of NumElements. fp16 trunc stores are vcvt
16743 // and then stored as truncating integer stores.
16744 EVT NewFromVT = EVT::getVectorVT(C, FromEltVT, NumElements);
16745 EVT NewToVT = EVT::getVectorVT(
16746 C, EVT::getIntegerVT(C, ToEltVT.getSizeInBits()), NumElements);
16747
16749 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
16750 unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
16751 SDValue NewPtr =
16752 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16753
16754 SDValue Extract =
16755 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
16756 DAG.getConstant(i * NumElements, DL, MVT::i32));
16757
16758 SDValue FPTrunc =
16759 DAG.getNode(ARMISD::VCVTN, DL, MVT::v8f16, DAG.getUNDEF(MVT::v8f16),
16760 Extract, DAG.getConstant(0, DL, MVT::i32));
16761 Extract = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v4i32, FPTrunc);
16762
16763 SDValue Store = DAG.getTruncStore(
16764 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16765 NewToVT, Alignment, MMOFlags, AAInfo);
16766 Stores.push_back(Store);
16767 }
16768 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16769}
16770
16771// Try taking a single vector store from an MVETRUNC (which would otherwise turn
16772// into an expensive buildvector) and splitting it into a series of narrowing
16773// stores.
16775 SelectionDAG &DAG) {
16776 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16777 return SDValue();
16778 SDValue Trunc = St->getValue();
16779 if (Trunc->getOpcode() != ARMISD::MVETRUNC)
16780 return SDValue();
16781 EVT FromVT = Trunc->getOperand(0).getValueType();
16782 EVT ToVT = Trunc.getValueType();
16783
16784 LLVMContext &C = *DAG.getContext();
16785 SDLoc DL(St);
16786 // Details about the old store
16787 SDValue Ch = St->getChain();
16788 SDValue BasePtr = St->getBasePtr();
16789 Align Alignment = St->getOriginalAlign();
16791 AAMDNodes AAInfo = St->getAAInfo();
16792
16793 EVT NewToVT = EVT::getVectorVT(C, ToVT.getVectorElementType(),
16794 FromVT.getVectorNumElements());
16795
16797 for (unsigned i = 0; i < Trunc.getNumOperands(); i++) {
16798 unsigned NewOffset =
16799 i * FromVT.getVectorNumElements() * ToVT.getScalarSizeInBits() / 8;
16800 SDValue NewPtr =
16801 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16802
16803 SDValue Extract = Trunc.getOperand(i);
16804 SDValue Store = DAG.getTruncStore(
16805 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16806 NewToVT, Alignment, MMOFlags, AAInfo);
16807 Stores.push_back(Store);
16808 }
16809 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16810}
16811
16812// Given a floating point store from an extracted vector, with an integer
16813// VGETLANE that already exists, store the existing VGETLANEu directly. This can
16814// help reduce fp register pressure, doesn't require the fp extract and allows
16815// use of more integer post-inc stores not available with vstr.
16817 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16818 return SDValue();
16819 SDValue Extract = St->getValue();
16820 EVT VT = Extract.getValueType();
16821 // For now only uses f16. This may be useful for f32 too, but that will
16822 // be bitcast(extract), not the VGETLANEu we currently check here.
16823 if (VT != MVT::f16 || Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16824 return SDValue();
16825
16826 SDNode *GetLane =
16827 DAG.getNodeIfExists(ARMISD::VGETLANEu, DAG.getVTList(MVT::i32),
16828 {Extract.getOperand(0), Extract.getOperand(1)});
16829 if (!GetLane)
16830 return SDValue();
16831
16832 LLVMContext &C = *DAG.getContext();
16833 SDLoc DL(St);
16834 // Create a new integer store to replace the existing floating point version.
16835 SDValue Ch = St->getChain();
16836 SDValue BasePtr = St->getBasePtr();
16837 Align Alignment = St->getOriginalAlign();
16839 AAMDNodes AAInfo = St->getAAInfo();
16840 EVT NewToVT = EVT::getIntegerVT(C, VT.getSizeInBits());
16841 SDValue Store = DAG.getTruncStore(Ch, DL, SDValue(GetLane, 0), BasePtr,
16842 St->getPointerInfo(), NewToVT, Alignment,
16843 MMOFlags, AAInfo);
16844
16845 return Store;
16846}
16847
16848/// PerformSTORECombine - Target-specific dag combine xforms for
16849/// ISD::STORE.
16852 const ARMSubtarget *Subtarget) {
16853 StoreSDNode *St = cast<StoreSDNode>(N);
16854 if (St->isVolatile())
16855 return SDValue();
16856 SDValue StVal = St->getValue();
16857 EVT VT = StVal.getValueType();
16858
16859 if (Subtarget->hasNEON())
16860 if (SDValue Store = PerformTruncatingStoreCombine(St, DCI.DAG))
16861 return Store;
16862
16863 if (Subtarget->hasMVEFloatOps())
16864 if (SDValue NewToken = PerformSplittingToNarrowingStores(St, DCI.DAG))
16865 return NewToken;
16866
16867 if (Subtarget->hasMVEIntegerOps()) {
16868 if (SDValue NewChain = PerformExtractFpToIntStores(St, DCI.DAG))
16869 return NewChain;
16870 if (SDValue NewToken =
16872 return NewToken;
16873 }
16874
16875 if (!ISD::isNormalStore(St))
16876 return SDValue();
16877
16878 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
16879 // ARM stores of arguments in the same cache line.
16880 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
16881 StVal.getNode()->hasOneUse()) {
16882 SelectionDAG &DAG = DCI.DAG;
16883 bool isBigEndian = DAG.getDataLayout().isBigEndian();
16884 SDLoc DL(St);
16885 SDValue BasePtr = St->getBasePtr();
16886 SDValue NewST1 = DAG.getStore(
16887 St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
16888 BasePtr, St->getPointerInfo(), St->getOriginalAlign(),
16889 St->getMemOperand()->getFlags());
16890
16891 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
16892 DAG.getConstant(4, DL, MVT::i32));
16893 return DAG.getStore(NewST1.getValue(0), DL,
16894 StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
16895 OffsetPtr, St->getPointerInfo().getWithOffset(4),
16896 St->getOriginalAlign(),
16897 St->getMemOperand()->getFlags());
16898 }
16899
16900 if (StVal.getValueType() == MVT::i64 &&
16902
16903 // Bitcast an i64 store extracted from a vector to f64.
16904 // Otherwise, the i64 value will be legalized to a pair of i32 values.
16905 SelectionDAG &DAG = DCI.DAG;
16906 SDLoc dl(StVal);
16907 SDValue IntVec = StVal.getOperand(0);
16908 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
16910 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
16911 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
16912 Vec, StVal.getOperand(1));
16913 dl = SDLoc(N);
16914 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
16915 // Make the DAGCombiner fold the bitcasts.
16916 DCI.AddToWorklist(Vec.getNode());
16917 DCI.AddToWorklist(ExtElt.getNode());
16918 DCI.AddToWorklist(V.getNode());
16919 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
16920 St->getPointerInfo(), St->getAlign(),
16921 St->getMemOperand()->getFlags(), St->getAAInfo());
16922 }
16923
16924 // If this is a legal vector store, try to combine it into a VST1_UPD.
16925 if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() &&
16927 return CombineBaseUpdate(N, DCI);
16928
16929 return SDValue();
16930}
16931
16932/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
16933/// can replace combinations of VMUL and VCVT (floating-point to integer)
16934/// when the VMUL has a constant operand that is a power of 2.
16935///
16936/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
16937/// vmul.f32 d16, d17, d16
16938/// vcvt.s32.f32 d16, d16
16939/// becomes:
16940/// vcvt.s32.f32 d16, d16, #3
16942 const ARMSubtarget *Subtarget) {
16943 if (!Subtarget->hasNEON())
16944 return SDValue();
16945
16946 SDValue Op = N->getOperand(0);
16947 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
16948 Op.getOpcode() != ISD::FMUL)
16949 return SDValue();
16950
16951 SDValue ConstVec = Op->getOperand(1);
16952 if (!isa<BuildVectorSDNode>(ConstVec))
16953 return SDValue();
16954
16955 MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
16956 uint32_t FloatBits = FloatTy.getSizeInBits();
16957 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
16958 uint32_t IntBits = IntTy.getSizeInBits();
16959 unsigned NumLanes = Op.getValueType().getVectorNumElements();
16960 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16961 // These instructions only exist converting from f32 to i32. We can handle
16962 // smaller integers by generating an extra truncate, but larger ones would
16963 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
16964 // these intructions only support v2i32/v4i32 types.
16965 return SDValue();
16966 }
16967
16968 BitVector UndefElements;
16969 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
16970 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
16971 if (C == -1 || C == 0 || C > 32)
16972 return SDValue();
16973
16974 SDLoc dl(N);
16975 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
16976 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
16977 Intrinsic::arm_neon_vcvtfp2fxu;
16978 SDValue FixConv = DAG.getNode(
16979 ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
16980 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
16981 DAG.getConstant(C, dl, MVT::i32));
16982
16983 if (IntBits < FloatBits)
16984 FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
16985
16986 return FixConv;
16987}
16988
16990 const ARMSubtarget *Subtarget) {
16991 if (!Subtarget->hasMVEFloatOps())
16992 return SDValue();
16993
16994 // Turn (fadd x, (vselect c, y, -0.0)) into (vselect c, (fadd x, y), x)
16995 // The second form can be more easily turned into a predicated vadd, and
16996 // possibly combined into a fma to become a predicated vfma.
16997 SDValue Op0 = N->getOperand(0);
16998 SDValue Op1 = N->getOperand(1);
16999 EVT VT = N->getValueType(0);
17000 SDLoc DL(N);
17001
17002 // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set,
17003 // which these VMOV's represent.
17004 auto isIdentitySplat = [&](SDValue Op, bool NSZ) {
17005 if (Op.getOpcode() != ISD::BITCAST ||
17006 Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)
17007 return false;
17008 uint64_t ImmVal = Op.getOperand(0).getConstantOperandVal(0);
17009 if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ)))
17010 return true;
17011 if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ)))
17012 return true;
17013 return false;
17014 };
17015
17016 if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT)
17017 std::swap(Op0, Op1);
17018
17019 if (Op1.getOpcode() != ISD::VSELECT)
17020 return SDValue();
17021
17022 SDNodeFlags FaddFlags = N->getFlags();
17023 bool NSZ = FaddFlags.hasNoSignedZeros();
17024 if (!isIdentitySplat(Op1.getOperand(2), NSZ))
17025 return SDValue();
17026
17027 SDValue FAdd =
17028 DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), FaddFlags);
17029 return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0, FaddFlags);
17030}
17031
17033 SDValue LHS = N->getOperand(0);
17034 SDValue RHS = N->getOperand(1);
17035 EVT VT = N->getValueType(0);
17036 SDLoc DL(N);
17037
17038 if (!N->getFlags().hasAllowReassociation())
17039 return SDValue();
17040
17041 // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)
17042 auto ReassocComplex = [&](SDValue A, SDValue B) {
17043 if (A.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
17044 return SDValue();
17045 unsigned Opc = A.getConstantOperandVal(0);
17046 if (Opc != Intrinsic::arm_mve_vcmlaq)
17047 return SDValue();
17048 SDValue VCMLA = DAG.getNode(
17049 ISD::INTRINSIC_WO_CHAIN, DL, VT, A.getOperand(0), A.getOperand(1),
17050 DAG.getNode(ISD::FADD, DL, VT, A.getOperand(2), B, N->getFlags()),
17051 A.getOperand(3), A.getOperand(4));
17052 VCMLA->setFlags(A->getFlags());
17053 return VCMLA;
17054 };
17055 if (SDValue R = ReassocComplex(LHS, RHS))
17056 return R;
17057 if (SDValue R = ReassocComplex(RHS, LHS))
17058 return R;
17059
17060 return SDValue();
17061}
17062
17064 const ARMSubtarget *Subtarget) {
17065 if (SDValue S = PerformFAddVSelectCombine(N, DAG, Subtarget))
17066 return S;
17067 if (SDValue S = PerformFADDVCMLACombine(N, DAG))
17068 return S;
17069 return SDValue();
17070}
17071
17072/// PerformVMulVCTPCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
17073/// can replace combinations of VCVT (integer to floating-point) and VMUL
17074/// when the VMUL has a constant operand that is a power of 2.
17075///
17076/// Example (assume d17 = <float 0.125, float 0.125>):
17077/// vcvt.f32.s32 d16, d16
17078/// vmul.f32 d16, d16, d17
17079/// becomes:
17080/// vcvt.f32.s32 d16, d16, #3
17082 const ARMSubtarget *Subtarget) {
17083 if (!Subtarget->hasNEON())
17084 return SDValue();
17085
17086 SDValue Op = N->getOperand(0);
17087 unsigned OpOpcode = Op.getNode()->getOpcode();
17088 if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
17089 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
17090 return SDValue();
17091
17092 SDValue ConstVec = N->getOperand(1);
17093 if (!isa<BuildVectorSDNode>(ConstVec))
17094 return SDValue();
17095
17096 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
17097 uint32_t FloatBits = FloatTy.getSizeInBits();
17098 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
17099 uint32_t IntBits = IntTy.getSizeInBits();
17100 unsigned NumLanes = Op.getValueType().getVectorNumElements();
17101 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
17102 // These instructions only exist converting from i32 to f32. We can handle
17103 // smaller integers by generating an extra extend, but larger ones would
17104 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
17105 // these intructions only support v2i32/v4i32 types.
17106 return SDValue();
17107 }
17108
17109 ConstantFPSDNode *CN = isConstOrConstSplatFP(ConstVec, true);
17110 APFloat Recip(0.0f);
17111 if (!CN || !CN->getValueAPF().getExactInverse(&Recip))
17112 return SDValue();
17113
17114 bool IsExact;
17115 APSInt IntVal(33);
17116 if (Recip.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) !=
17117 APFloat::opOK ||
17118 !IsExact)
17119 return SDValue();
17120
17121 int32_t C = IntVal.exactLogBase2();
17122 if (C == -1 || C == 0 || C > 32)
17123 return SDValue();
17124
17125 SDLoc DL(N);
17126 bool isSigned = OpOpcode == ISD::SINT_TO_FP;
17127 SDValue ConvInput = Op.getOperand(0);
17128 if (IntBits < FloatBits)
17130 NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, ConvInput);
17131
17132 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp
17133 : Intrinsic::arm_neon_vcvtfxu2fp;
17134 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
17135 DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
17136 DAG.getConstant(C, DL, MVT::i32));
17137}
17138
17140 const ARMSubtarget *ST) {
17141 if (!ST->hasMVEIntegerOps())
17142 return SDValue();
17143
17144 assert(N->getOpcode() == ISD::VECREDUCE_ADD);
17145 EVT ResVT = N->getValueType(0);
17146 SDValue N0 = N->getOperand(0);
17147 SDLoc dl(N);
17148
17149 // Try to turn vecreduce_add(add(x, y)) into vecreduce(x) + vecreduce(y)
17150 if (ResVT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
17151 (N0.getValueType() == MVT::v4i32 || N0.getValueType() == MVT::v8i16 ||
17152 N0.getValueType() == MVT::v16i8)) {
17153 SDValue Red0 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(0));
17154 SDValue Red1 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(1));
17155 return DAG.getNode(ISD::ADD, dl, ResVT, Red0, Red1);
17156 }
17157
17158 // We are looking for something that will have illegal types if left alone,
17159 // but that we can convert to a single instruction under MVE. For example
17160 // vecreduce_add(sext(A, v8i32)) => VADDV.s16 A
17161 // or
17162 // vecreduce_add(mul(zext(A, v16i32), zext(B, v16i32))) => VMLADAV.u8 A, B
17163
17164 // The legal cases are:
17165 // VADDV u/s 8/16/32
17166 // VMLAV u/s 8/16/32
17167 // VADDLV u/s 32
17168 // VMLALV u/s 16/32
17169
17170 // If the input vector is smaller than legal (v4i8/v4i16 for example) we can
17171 // extend it and use v4i32 instead.
17172 auto ExtTypeMatches = [](SDValue A, ArrayRef<MVT> ExtTypes) {
17173 EVT AVT = A.getValueType();
17174 return any_of(ExtTypes, [&](MVT Ty) {
17175 return AVT.getVectorNumElements() == Ty.getVectorNumElements() &&
17176 AVT.bitsLE(Ty);
17177 });
17178 };
17179 auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) {
17180 EVT AVT = A.getValueType();
17181 if (!AVT.is128BitVector())
17182 A = DAG.getNode(ExtendCode, dl,
17184 128 / AVT.getVectorMinNumElements())),
17185 A);
17186 return A;
17187 };
17188 auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes) {
17189 if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
17190 return SDValue();
17191 SDValue A = N0->getOperand(0);
17192 if (ExtTypeMatches(A, ExtTypes))
17193 return ExtendIfNeeded(A, ExtendCode);
17194 return SDValue();
17195 };
17196 auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode,
17197 ArrayRef<MVT> ExtTypes, SDValue &Mask) {
17198 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17200 return SDValue();
17201 Mask = N0->getOperand(0);
17202 SDValue Ext = N0->getOperand(1);
17203 if (Ext->getOpcode() != ExtendCode)
17204 return SDValue();
17205 SDValue A = Ext->getOperand(0);
17206 if (ExtTypeMatches(A, ExtTypes))
17207 return ExtendIfNeeded(A, ExtendCode);
17208 return SDValue();
17209 };
17210 auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17211 SDValue &A, SDValue &B) {
17212 // For a vmla we are trying to match a larger pattern:
17213 // ExtA = sext/zext A
17214 // ExtB = sext/zext B
17215 // Mul = mul ExtA, ExtB
17216 // vecreduce.add Mul
17217 // There might also be en extra extend between the mul and the addreduce, so
17218 // long as the bitwidth is high enough to make them equivalent (for example
17219 // original v8i16 might be mul at v8i32 and the reduce happens at v8i64).
17220 if (ResVT != RetTy)
17221 return false;
17222 SDValue Mul = N0;
17223 if (Mul->getOpcode() == ExtendCode &&
17224 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17225 ResVT.getScalarSizeInBits())
17226 Mul = Mul->getOperand(0);
17227 if (Mul->getOpcode() != ISD::MUL)
17228 return false;
17229 SDValue ExtA = Mul->getOperand(0);
17230 SDValue ExtB = Mul->getOperand(1);
17231 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17232 return false;
17233 A = ExtA->getOperand(0);
17234 B = ExtB->getOperand(0);
17235 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17236 A = ExtendIfNeeded(A, ExtendCode);
17237 B = ExtendIfNeeded(B, ExtendCode);
17238 return true;
17239 }
17240 return false;
17241 };
17242 auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17243 SDValue &A, SDValue &B, SDValue &Mask) {
17244 // Same as the pattern above with a select for the zero predicated lanes
17245 // ExtA = sext/zext A
17246 // ExtB = sext/zext B
17247 // Mul = mul ExtA, ExtB
17248 // N0 = select Mask, Mul, 0
17249 // vecreduce.add N0
17250 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17252 return false;
17253 Mask = N0->getOperand(0);
17254 SDValue Mul = N0->getOperand(1);
17255 if (Mul->getOpcode() == ExtendCode &&
17256 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17257 ResVT.getScalarSizeInBits())
17258 Mul = Mul->getOperand(0);
17259 if (Mul->getOpcode() != ISD::MUL)
17260 return false;
17261 SDValue ExtA = Mul->getOperand(0);
17262 SDValue ExtB = Mul->getOperand(1);
17263 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17264 return false;
17265 A = ExtA->getOperand(0);
17266 B = ExtB->getOperand(0);
17267 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17268 A = ExtendIfNeeded(A, ExtendCode);
17269 B = ExtendIfNeeded(B, ExtendCode);
17270 return true;
17271 }
17272 return false;
17273 };
17274 auto Create64bitNode = [&](unsigned Opcode, ArrayRef<SDValue> Ops) {
17275 // Split illegal MVT::v16i8->i64 vector reductions into two legal v8i16->i64
17276 // reductions. The operands are extended with MVEEXT, but as they are
17277 // reductions the lane orders do not matter. MVEEXT may be combined with
17278 // loads to produce two extending loads, or else they will be expanded to
17279 // VREV/VMOVL.
17280 EVT VT = Ops[0].getValueType();
17281 if (VT == MVT::v16i8) {
17282 assert((Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) &&
17283 "Unexpected illegal long reduction opcode");
17284 bool IsUnsigned = Opcode == ARMISD::VMLALVu;
17285
17286 SDValue Ext0 =
17287 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17288 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[0]);
17289 SDValue Ext1 =
17290 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17291 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[1]);
17292
17293 SDValue MLA0 = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
17294 Ext0, Ext1);
17295 SDValue MLA1 =
17296 DAG.getNode(IsUnsigned ? ARMISD::VMLALVAu : ARMISD::VMLALVAs, dl,
17297 DAG.getVTList(MVT::i32, MVT::i32), MLA0, MLA0.getValue(1),
17298 Ext0.getValue(1), Ext1.getValue(1));
17299 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, MLA1, MLA1.getValue(1));
17300 }
17301 SDValue Node = DAG.getNode(Opcode, dl, {MVT::i32, MVT::i32}, Ops);
17302 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Node,
17303 SDValue(Node.getNode(), 1));
17304 };
17305
17306 SDValue A, B;
17307 SDValue Mask;
17308 if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17309 return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
17310 if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17311 return DAG.getNode(ARMISD::VMLAVu, dl, ResVT, A, B);
17312 if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17313 A, B))
17314 return Create64bitNode(ARMISD::VMLALVs, {A, B});
17315 if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17316 A, B))
17317 return Create64bitNode(ARMISD::VMLALVu, {A, B});
17318 if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B))
17319 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17320 DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B));
17321 if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B))
17322 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17323 DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B));
17324
17325 if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17326 Mask))
17327 return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask);
17328 if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17329 Mask))
17330 return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask);
17331 if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17332 Mask))
17333 return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask});
17334 if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17335 Mask))
17336 return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask});
17337 if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask))
17338 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17339 DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask));
17340 if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask))
17341 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17342 DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
17343
17344 if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
17345 return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
17346 if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
17347 return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
17348 if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
17349 return Create64bitNode(ARMISD::VADDLVs, {A});
17350 if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
17351 return Create64bitNode(ARMISD::VADDLVu, {A});
17352 if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
17353 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17354 DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
17355 if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
17356 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17357 DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
17358
17359 if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17360 return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
17361 if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17362 return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
17363 if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))
17364 return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
17365 if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))
17366 return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
17367 if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
17368 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17369 DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
17370 if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
17371 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17372 DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
17373
17374 // Some complications. We can get a case where the two inputs of the mul are
17375 // the same, then the output sext will have been helpfully converted to a
17376 // zext. Turn it back.
17377 SDValue Op = N0;
17378 if (Op->getOpcode() == ISD::VSELECT)
17379 Op = Op->getOperand(1);
17380 if (Op->getOpcode() == ISD::ZERO_EXTEND &&
17381 Op->getOperand(0)->getOpcode() == ISD::MUL) {
17382 SDValue Mul = Op->getOperand(0);
17383 if (Mul->getOperand(0) == Mul->getOperand(1) &&
17384 Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
17385 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
17386 if (Op != N0)
17387 Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
17388 N0->getOperand(0), Ext, N0->getOperand(2));
17389 return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext);
17390 }
17391 }
17392
17393 return SDValue();
17394}
17395
17396// Looks for vaddv(shuffle) or vmlav(shuffle, shuffle), with a shuffle where all
17397// the lanes are used. Due to the reduction being commutative the shuffle can be
17398// removed.
17400 unsigned VecOp = N->getOperand(0).getValueType().isVector() ? 0 : 2;
17401 auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp));
17402 if (!Shuf || !Shuf->getOperand(1).isUndef())
17403 return SDValue();
17404
17405 // Check all elements are used once in the mask.
17406 ArrayRef<int> Mask = Shuf->getMask();
17407 APInt SetElts(Mask.size(), 0);
17408 for (int E : Mask) {
17409 if (E < 0 || E >= (int)Mask.size())
17410 return SDValue();
17411 SetElts.setBit(E);
17412 }
17413 if (!SetElts.isAllOnes())
17414 return SDValue();
17415
17416 if (N->getNumOperands() != VecOp + 1) {
17417 auto *Shuf2 = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp + 1));
17418 if (!Shuf2 || !Shuf2->getOperand(1).isUndef() || Shuf2->getMask() != Mask)
17419 return SDValue();
17420 }
17421
17423 for (SDValue Op : N->ops()) {
17424 if (Op.getValueType().isVector())
17425 Ops.push_back(Op.getOperand(0));
17426 else
17427 Ops.push_back(Op);
17428 }
17429 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops);
17430}
17431
17434 SDValue Op0 = N->getOperand(0);
17435 SDValue Op1 = N->getOperand(1);
17436 unsigned IsTop = N->getConstantOperandVal(2);
17437
17438 // VMOVNT a undef -> a
17439 // VMOVNB a undef -> a
17440 // VMOVNB undef a -> a
17441 if (Op1->isUndef())
17442 return Op0;
17443 if (Op0->isUndef() && !IsTop)
17444 return Op1;
17445
17446 // VMOVNt(c, VQMOVNb(a, b)) => VQMOVNt(c, b)
17447 // VMOVNb(c, VQMOVNb(a, b)) => VQMOVNb(c, b)
17448 if ((Op1->getOpcode() == ARMISD::VQMOVNs ||
17449 Op1->getOpcode() == ARMISD::VQMOVNu) &&
17450 Op1->getConstantOperandVal(2) == 0)
17451 return DCI.DAG.getNode(Op1->getOpcode(), SDLoc(Op1), N->getValueType(0),
17452 Op0, Op1->getOperand(1), N->getOperand(2));
17453
17454 // Only the bottom lanes from Qm (Op1) and either the top or bottom lanes from
17455 // Qd (Op0) are demanded from a VMOVN, depending on whether we are inserting
17456 // into the top or bottom lanes.
17457 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17458 APInt Op1DemandedElts = APInt::getSplat(NumElts, APInt::getLowBitsSet(2, 1));
17459 APInt Op0DemandedElts =
17460 IsTop ? Op1DemandedElts
17461 : APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1));
17462
17463 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17464 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17465 return SDValue(N, 0);
17466 if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, DCI))
17467 return SDValue(N, 0);
17468
17469 return SDValue();
17470}
17471
17474 SDValue Op0 = N->getOperand(0);
17475 unsigned IsTop = N->getConstantOperandVal(2);
17476
17477 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17478 APInt Op0DemandedElts =
17479 APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
17480 : APInt::getHighBitsSet(2, 1));
17481
17482 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17483 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17484 return SDValue(N, 0);
17485 return SDValue();
17486}
17487
17490 EVT VT = N->getValueType(0);
17491 SDValue LHS = N->getOperand(0);
17492 SDValue RHS = N->getOperand(1);
17493
17494 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
17495 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
17496 // Turn VQDMULH(shuffle, shuffle) -> shuffle(VQDMULH)
17497 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
17498 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
17499 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
17500 SDLoc DL(N);
17501 SDValue NewBinOp = DCI.DAG.getNode(N->getOpcode(), DL, VT,
17502 LHS.getOperand(0), RHS.getOperand(0));
17503 SDValue UndefV = LHS.getOperand(1);
17504 return DCI.DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
17505 }
17506 return SDValue();
17507}
17508
17510 SDLoc DL(N);
17511 SDValue Op0 = N->getOperand(0);
17512 SDValue Op1 = N->getOperand(1);
17513
17514 // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from
17515 // uses of the intrinsics.
17516 if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
17517 int ShiftAmt = C->getSExtValue();
17518 if (ShiftAmt == 0) {
17519 SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL);
17520 DAG.ReplaceAllUsesWith(N, Merge.getNode());
17521 return SDValue();
17522 }
17523
17524 if (ShiftAmt >= -32 && ShiftAmt < 0) {
17525 unsigned NewOpcode =
17526 N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
17527 SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,
17528 DAG.getConstant(-ShiftAmt, DL, MVT::i32));
17529 DAG.ReplaceAllUsesWith(N, NewShift.getNode());
17530 return NewShift;
17531 }
17532 }
17533
17534 return SDValue();
17535}
17536
17537/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
17539 DAGCombinerInfo &DCI) const {
17540 SelectionDAG &DAG = DCI.DAG;
17541 unsigned IntNo = N->getConstantOperandVal(0);
17542 switch (IntNo) {
17543 default:
17544 // Don't do anything for most intrinsics.
17545 break;
17546
17547 // Vector shifts: check for immediate versions and lower them.
17548 // Note: This is done during DAG combining instead of DAG legalizing because
17549 // the build_vectors for 64-bit vector element shift counts are generally
17550 // not legal, and it is hard to see their values after they get legalized to
17551 // loads from a constant pool.
17552 case Intrinsic::arm_neon_vshifts:
17553 case Intrinsic::arm_neon_vshiftu:
17554 case Intrinsic::arm_neon_vrshifts:
17555 case Intrinsic::arm_neon_vrshiftu:
17556 case Intrinsic::arm_neon_vrshiftn:
17557 case Intrinsic::arm_neon_vqshifts:
17558 case Intrinsic::arm_neon_vqshiftu:
17559 case Intrinsic::arm_neon_vqshiftsu:
17560 case Intrinsic::arm_neon_vqshiftns:
17561 case Intrinsic::arm_neon_vqshiftnu:
17562 case Intrinsic::arm_neon_vqshiftnsu:
17563 case Intrinsic::arm_neon_vqrshiftns:
17564 case Intrinsic::arm_neon_vqrshiftnu:
17565 case Intrinsic::arm_neon_vqrshiftnsu: {
17566 EVT VT = N->getOperand(1).getValueType();
17567 int64_t Cnt;
17568 unsigned VShiftOpc = 0;
17569
17570 switch (IntNo) {
17571 case Intrinsic::arm_neon_vshifts:
17572 case Intrinsic::arm_neon_vshiftu:
17573 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
17574 VShiftOpc = ARMISD::VSHLIMM;
17575 break;
17576 }
17577 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
17578 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
17580 break;
17581 }
17582 return SDValue();
17583
17584 case Intrinsic::arm_neon_vrshifts:
17585 case Intrinsic::arm_neon_vrshiftu:
17586 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
17587 break;
17588 return SDValue();
17589
17590 case Intrinsic::arm_neon_vqshifts:
17591 case Intrinsic::arm_neon_vqshiftu:
17592 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17593 break;
17594 return SDValue();
17595
17596 case Intrinsic::arm_neon_vqshiftsu:
17597 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17598 break;
17599 llvm_unreachable("invalid shift count for vqshlu intrinsic");
17600
17601 case Intrinsic::arm_neon_vrshiftn:
17602 case Intrinsic::arm_neon_vqshiftns:
17603 case Intrinsic::arm_neon_vqshiftnu:
17604 case Intrinsic::arm_neon_vqshiftnsu:
17605 case Intrinsic::arm_neon_vqrshiftns:
17606 case Intrinsic::arm_neon_vqrshiftnu:
17607 case Intrinsic::arm_neon_vqrshiftnsu:
17608 // Narrowing shifts require an immediate right shift.
17609 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
17610 break;
17611 llvm_unreachable("invalid shift count for narrowing vector shift "
17612 "intrinsic");
17613
17614 default:
17615 llvm_unreachable("unhandled vector shift");
17616 }
17617
17618 switch (IntNo) {
17619 case Intrinsic::arm_neon_vshifts:
17620 case Intrinsic::arm_neon_vshiftu:
17621 // Opcode already set above.
17622 break;
17623 case Intrinsic::arm_neon_vrshifts:
17624 VShiftOpc = ARMISD::VRSHRsIMM;
17625 break;
17626 case Intrinsic::arm_neon_vrshiftu:
17627 VShiftOpc = ARMISD::VRSHRuIMM;
17628 break;
17629 case Intrinsic::arm_neon_vrshiftn:
17630 VShiftOpc = ARMISD::VRSHRNIMM;
17631 break;
17632 case Intrinsic::arm_neon_vqshifts:
17633 VShiftOpc = ARMISD::VQSHLsIMM;
17634 break;
17635 case Intrinsic::arm_neon_vqshiftu:
17636 VShiftOpc = ARMISD::VQSHLuIMM;
17637 break;
17638 case Intrinsic::arm_neon_vqshiftsu:
17639 VShiftOpc = ARMISD::VQSHLsuIMM;
17640 break;
17641 case Intrinsic::arm_neon_vqshiftns:
17642 VShiftOpc = ARMISD::VQSHRNsIMM;
17643 break;
17644 case Intrinsic::arm_neon_vqshiftnu:
17645 VShiftOpc = ARMISD::VQSHRNuIMM;
17646 break;
17647 case Intrinsic::arm_neon_vqshiftnsu:
17648 VShiftOpc = ARMISD::VQSHRNsuIMM;
17649 break;
17650 case Intrinsic::arm_neon_vqrshiftns:
17651 VShiftOpc = ARMISD::VQRSHRNsIMM;
17652 break;
17653 case Intrinsic::arm_neon_vqrshiftnu:
17654 VShiftOpc = ARMISD::VQRSHRNuIMM;
17655 break;
17656 case Intrinsic::arm_neon_vqrshiftnsu:
17657 VShiftOpc = ARMISD::VQRSHRNsuIMM;
17658 break;
17659 }
17660
17661 SDLoc dl(N);
17662 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17663 N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
17664 }
17665
17666 case Intrinsic::arm_neon_vshiftins: {
17667 EVT VT = N->getOperand(1).getValueType();
17668 int64_t Cnt;
17669 unsigned VShiftOpc = 0;
17670
17671 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
17672 VShiftOpc = ARMISD::VSLIIMM;
17673 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
17674 VShiftOpc = ARMISD::VSRIIMM;
17675 else {
17676 llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
17677 }
17678
17679 SDLoc dl(N);
17680 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17681 N->getOperand(1), N->getOperand(2),
17682 DAG.getConstant(Cnt, dl, MVT::i32));
17683 }
17684
17685 case Intrinsic::arm_neon_vqrshifts:
17686 case Intrinsic::arm_neon_vqrshiftu:
17687 // No immediate versions of these to check for.
17688 break;
17689
17690 case Intrinsic::arm_neon_vbsl: {
17691 SDLoc dl(N);
17692 return DAG.getNode(ARMISD::VBSP, dl, N->getValueType(0), N->getOperand(1),
17693 N->getOperand(2), N->getOperand(3));
17694 }
17695 case Intrinsic::arm_mve_vqdmlah:
17696 case Intrinsic::arm_mve_vqdmlash:
17697 case Intrinsic::arm_mve_vqrdmlah:
17698 case Intrinsic::arm_mve_vqrdmlash:
17699 case Intrinsic::arm_mve_vmla_n_predicated:
17700 case Intrinsic::arm_mve_vmlas_n_predicated:
17701 case Intrinsic::arm_mve_vqdmlah_predicated:
17702 case Intrinsic::arm_mve_vqdmlash_predicated:
17703 case Intrinsic::arm_mve_vqrdmlah_predicated:
17704 case Intrinsic::arm_mve_vqrdmlash_predicated: {
17705 // These intrinsics all take an i32 scalar operand which is narrowed to the
17706 // size of a single lane of the vector type they return. So we don't need
17707 // any bits of that operand above that point, which allows us to eliminate
17708 // uxth/sxth.
17709 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
17710 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17711 if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))
17712 return SDValue();
17713 break;
17714 }
17715
17716 case Intrinsic::arm_mve_minv:
17717 case Intrinsic::arm_mve_maxv:
17718 case Intrinsic::arm_mve_minav:
17719 case Intrinsic::arm_mve_maxav:
17720 case Intrinsic::arm_mve_minv_predicated:
17721 case Intrinsic::arm_mve_maxv_predicated:
17722 case Intrinsic::arm_mve_minav_predicated:
17723 case Intrinsic::arm_mve_maxav_predicated: {
17724 // These intrinsics all take an i32 scalar operand which is narrowed to the
17725 // size of a single lane of the vector type they take as the other input.
17726 unsigned BitWidth = N->getOperand(2)->getValueType(0).getScalarSizeInBits();
17727 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17728 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
17729 return SDValue();
17730 break;
17731 }
17732
17733 case Intrinsic::arm_mve_addv: {
17734 // Turn this intrinsic straight into the appropriate ARMISD::VADDV node,
17735 // which allow PerformADDVecReduce to turn it into VADDLV when possible.
17736 bool Unsigned = N->getConstantOperandVal(2);
17737 unsigned Opc = Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs;
17738 return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1));
17739 }
17740
17741 case Intrinsic::arm_mve_addlv:
17742 case Intrinsic::arm_mve_addlv_predicated: {
17743 // Same for these, but ARMISD::VADDLV has to be followed by a BUILD_PAIR
17744 // which recombines the two outputs into an i64
17745 bool Unsigned = N->getConstantOperandVal(2);
17746 unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?
17749
17751 for (unsigned i = 1, e = N->getNumOperands(); i < e; i++)
17752 if (i != 2) // skip the unsigned flag
17753 Ops.push_back(N->getOperand(i));
17754
17755 SDLoc dl(N);
17756 SDValue val = DAG.getNode(Opc, dl, {MVT::i32, MVT::i32}, Ops);
17757 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, val.getValue(0),
17758 val.getValue(1));
17759 }
17760 }
17761
17762 return SDValue();
17763}
17764
17765/// PerformShiftCombine - Checks for immediate versions of vector shifts and
17766/// lowers them. As with the vector shift intrinsics, this is done during DAG
17767/// combining instead of DAG legalizing because the build_vectors for 64-bit
17768/// vector element shift counts are generally not legal, and it is hard to see
17769/// their values after they get legalized to loads from a constant pool.
17772 const ARMSubtarget *ST) {
17773 SelectionDAG &DAG = DCI.DAG;
17774 EVT VT = N->getValueType(0);
17775
17776 if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
17777 N->getOperand(0)->getOpcode() == ISD::AND &&
17778 N->getOperand(0)->hasOneUse()) {
17779 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
17780 return SDValue();
17781 // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't
17782 // usually show up because instcombine prefers to canonicalize it to
17783 // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come
17784 // out of GEP lowering in some cases.
17785 SDValue N0 = N->getOperand(0);
17786 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
17787 if (!ShiftAmtNode)
17788 return SDValue();
17789 uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
17790 ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17791 if (!AndMaskNode)
17792 return SDValue();
17793 uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
17794 // Don't transform uxtb/uxth.
17795 if (AndMask == 255 || AndMask == 65535)
17796 return SDValue();
17797 if (isMask_32(AndMask)) {
17798 uint32_t MaskedBits = llvm::countl_zero(AndMask);
17799 if (MaskedBits > ShiftAmt) {
17800 SDLoc DL(N);
17801 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
17802 DAG.getConstant(MaskedBits, DL, MVT::i32));
17803 return DAG.getNode(
17804 ISD::SRL, DL, MVT::i32, SHL,
17805 DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
17806 }
17807 }
17808 }
17809
17810 // Nothing to be done for scalar shifts.
17811 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17812 if (!VT.isVector() || !TLI.isTypeLegal(VT))
17813 return SDValue();
17814 if (ST->hasMVEIntegerOps())
17815 return SDValue();
17816
17817 int64_t Cnt;
17818
17819 switch (N->getOpcode()) {
17820 default: llvm_unreachable("unexpected shift opcode");
17821
17822 case ISD::SHL:
17823 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
17824 SDLoc dl(N);
17825 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
17826 DAG.getConstant(Cnt, dl, MVT::i32));
17827 }
17828 break;
17829
17830 case ISD::SRA:
17831 case ISD::SRL:
17832 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
17833 unsigned VShiftOpc =
17834 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
17835 SDLoc dl(N);
17836 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
17837 DAG.getConstant(Cnt, dl, MVT::i32));
17838 }
17839 }
17840 return SDValue();
17841}
17842
17843// Look for a sign/zero/fpextend extend of a larger than legal load. This can be
17844// split into multiple extending loads, which are simpler to deal with than an
17845// arbitrary extend. For fp extends we use an integer extending load and a VCVTL
17846// to convert the type to an f32.
17848 SDValue N0 = N->getOperand(0);
17849 if (N0.getOpcode() != ISD::LOAD)
17850 return SDValue();
17851 LoadSDNode *LD = cast<LoadSDNode>(N0.getNode());
17852 if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() ||
17853 LD->getExtensionType() != ISD::NON_EXTLOAD)
17854 return SDValue();
17855 EVT FromVT = LD->getValueType(0);
17856 EVT ToVT = N->getValueType(0);
17857 if (!ToVT.isVector())
17858 return SDValue();
17860 EVT ToEltVT = ToVT.getVectorElementType();
17861 EVT FromEltVT = FromVT.getVectorElementType();
17862
17863 unsigned NumElements = 0;
17864 if (ToEltVT == MVT::i32 && FromEltVT == MVT::i8)
17865 NumElements = 4;
17866 if (ToEltVT == MVT::f32 && FromEltVT == MVT::f16)
17867 NumElements = 4;
17868 if (NumElements == 0 ||
17869 (FromEltVT != MVT::f16 && FromVT.getVectorNumElements() == NumElements) ||
17870 FromVT.getVectorNumElements() % NumElements != 0 ||
17871 !isPowerOf2_32(NumElements))
17872 return SDValue();
17873
17874 LLVMContext &C = *DAG.getContext();
17875 SDLoc DL(LD);
17876 // Details about the old load
17877 SDValue Ch = LD->getChain();
17878 SDValue BasePtr = LD->getBasePtr();
17879 Align Alignment = LD->getOriginalAlign();
17880 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
17881 AAMDNodes AAInfo = LD->getAAInfo();
17882
17883 ISD::LoadExtType NewExtType =
17884 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
17885 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
17886 EVT NewFromVT = EVT::getVectorVT(
17887 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
17888 EVT NewToVT = EVT::getVectorVT(
17889 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
17890
17893 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
17894 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
17895 SDValue NewPtr =
17896 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
17897
17898 SDValue NewLoad =
17899 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
17900 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
17901 Alignment, MMOFlags, AAInfo);
17902 Loads.push_back(NewLoad);
17903 Chains.push_back(SDValue(NewLoad.getNode(), 1));
17904 }
17905
17906 // Float truncs need to extended with VCVTB's into their floating point types.
17907 if (FromEltVT == MVT::f16) {
17909
17910 for (unsigned i = 0; i < Loads.size(); i++) {
17911 SDValue LoadBC =
17912 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v8f16, Loads[i]);
17913 SDValue FPExt = DAG.getNode(ARMISD::VCVTL, DL, MVT::v4f32, LoadBC,
17914 DAG.getConstant(0, DL, MVT::i32));
17915 Extends.push_back(FPExt);
17916 }
17917
17918 Loads = Extends;
17919 }
17920
17921 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
17922 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
17923 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Loads);
17924}
17925
17926/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
17927/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
17929 const ARMSubtarget *ST) {
17930 SDValue N0 = N->getOperand(0);
17931
17932 // Check for sign- and zero-extensions of vector extract operations of 8- and
17933 // 16-bit vector elements. NEON and MVE support these directly. They are
17934 // handled during DAG combining because type legalization will promote them
17935 // to 32-bit types and it is messy to recognize the operations after that.
17936 if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
17938 SDValue Vec = N0.getOperand(0);
17939 SDValue Lane = N0.getOperand(1);
17940 EVT VT = N->getValueType(0);
17941 EVT EltVT = N0.getValueType();
17942 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17943
17944 if (VT == MVT::i32 &&
17945 (EltVT == MVT::i8 || EltVT == MVT::i16) &&
17946 TLI.isTypeLegal(Vec.getValueType()) &&
17947 isa<ConstantSDNode>(Lane)) {
17948
17949 unsigned Opc = 0;
17950 switch (N->getOpcode()) {
17951 default: llvm_unreachable("unexpected opcode");
17952 case ISD::SIGN_EXTEND:
17953 Opc = ARMISD::VGETLANEs;
17954 break;
17955 case ISD::ZERO_EXTEND:
17956 case ISD::ANY_EXTEND:
17957 Opc = ARMISD::VGETLANEu;
17958 break;
17959 }
17960 return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
17961 }
17962 }
17963
17964 if (ST->hasMVEIntegerOps())
17965 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17966 return NewLoad;
17967
17968 return SDValue();
17969}
17970
17972 const ARMSubtarget *ST) {
17973 if (ST->hasMVEFloatOps())
17974 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17975 return NewLoad;
17976
17977 return SDValue();
17978}
17979
17980// Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating
17981// constant bounds.
17983 const ARMSubtarget *Subtarget) {
17984 if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
17985 !Subtarget->isThumb2())
17986 return SDValue();
17987
17988 EVT VT = Op.getValueType();
17989 SDValue Op0 = Op.getOperand(0);
17990
17991 if (VT != MVT::i32 ||
17992 (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) ||
17993 !isa<ConstantSDNode>(Op.getOperand(1)) ||
17994 !isa<ConstantSDNode>(Op0.getOperand(1)))
17995 return SDValue();
17996
17997 SDValue Min = Op;
17998 SDValue Max = Op0;
17999 SDValue Input = Op0.getOperand(0);
18000 if (Min.getOpcode() == ISD::SMAX)
18001 std::swap(Min, Max);
18002
18003 APInt MinC = Min.getConstantOperandAPInt(1);
18004 APInt MaxC = Max.getConstantOperandAPInt(1);
18005
18006 if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX ||
18007 !(MinC + 1).isPowerOf2())
18008 return SDValue();
18009
18010 SDLoc DL(Op);
18011 if (MinC == ~MaxC)
18012 return DAG.getNode(ARMISD::SSAT, DL, VT, Input,
18013 DAG.getConstant(MinC.countr_one(), DL, VT));
18014 if (MaxC == 0)
18015 return DAG.getNode(ARMISD::USAT, DL, VT, Input,
18016 DAG.getConstant(MinC.countr_one(), DL, VT));
18017
18018 return SDValue();
18019}
18020
18021/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
18022/// saturates.
18024 const ARMSubtarget *ST) {
18025 EVT VT = N->getValueType(0);
18026 SDValue N0 = N->getOperand(0);
18027
18028 if (VT == MVT::i32)
18029 return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST);
18030
18031 if (!ST->hasMVEIntegerOps())
18032 return SDValue();
18033
18034 if (SDValue V = PerformVQDMULHCombine(N, DAG))
18035 return V;
18036
18037 if (VT != MVT::v4i32 && VT != MVT::v8i16)
18038 return SDValue();
18039
18040 auto IsSignedSaturate = [&](SDNode *Min, SDNode *Max) {
18041 // Check one is a smin and the other is a smax
18042 if (Min->getOpcode() != ISD::SMIN)
18043 std::swap(Min, Max);
18044 if (Min->getOpcode() != ISD::SMIN || Max->getOpcode() != ISD::SMAX)
18045 return false;
18046
18047 APInt SaturateC;
18048 if (VT == MVT::v4i32)
18049 SaturateC = APInt(32, (1 << 15) - 1, true);
18050 else //if (VT == MVT::v8i16)
18051 SaturateC = APInt(16, (1 << 7) - 1, true);
18052
18053 APInt MinC, MaxC;
18054 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18055 MinC != SaturateC)
18056 return false;
18057 if (!ISD::isConstantSplatVector(Max->getOperand(1).getNode(), MaxC) ||
18058 MaxC != ~SaturateC)
18059 return false;
18060 return true;
18061 };
18062
18063 if (IsSignedSaturate(N, N0.getNode())) {
18064 SDLoc DL(N);
18065 MVT ExtVT, HalfVT;
18066 if (VT == MVT::v4i32) {
18067 HalfVT = MVT::v8i16;
18068 ExtVT = MVT::v4i16;
18069 } else { // if (VT == MVT::v8i16)
18070 HalfVT = MVT::v16i8;
18071 ExtVT = MVT::v8i8;
18072 }
18073
18074 // Create a VQMOVNB with undef top lanes, then signed extended into the top
18075 // half. That extend will hopefully be removed if only the bottom bits are
18076 // demanded (though a truncating store, for example).
18077 SDValue VQMOVN =
18078 DAG.getNode(ARMISD::VQMOVNs, DL, HalfVT, DAG.getUNDEF(HalfVT),
18079 N0->getOperand(0), DAG.getConstant(0, DL, MVT::i32));
18080 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
18081 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Bitcast,
18082 DAG.getValueType(ExtVT));
18083 }
18084
18085 auto IsUnsignedSaturate = [&](SDNode *Min) {
18086 // For unsigned, we just need to check for <= 0xffff
18087 if (Min->getOpcode() != ISD::UMIN)
18088 return false;
18089
18090 APInt SaturateC;
18091 if (VT == MVT::v4i32)
18092 SaturateC = APInt(32, (1 << 16) - 1, true);
18093 else //if (VT == MVT::v8i16)
18094 SaturateC = APInt(16, (1 << 8) - 1, true);
18095
18096 APInt MinC;
18097 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18098 MinC != SaturateC)
18099 return false;
18100 return true;
18101 };
18102
18103 if (IsUnsignedSaturate(N)) {
18104 SDLoc DL(N);
18105 MVT HalfVT;
18106 unsigned ExtConst;
18107 if (VT == MVT::v4i32) {
18108 HalfVT = MVT::v8i16;
18109 ExtConst = 0x0000FFFF;
18110 } else { //if (VT == MVT::v8i16)
18111 HalfVT = MVT::v16i8;
18112 ExtConst = 0x00FF;
18113 }
18114
18115 // Create a VQMOVNB with undef top lanes, then ZExt into the top half with
18116 // an AND. That extend will hopefully be removed if only the bottom bits are
18117 // demanded (though a truncating store, for example).
18118 SDValue VQMOVN =
18119 DAG.getNode(ARMISD::VQMOVNu, DL, HalfVT, DAG.getUNDEF(HalfVT), N0,
18120 DAG.getConstant(0, DL, MVT::i32));
18121 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
18122 return DAG.getNode(ISD::AND, DL, VT, Bitcast,
18123 DAG.getConstant(ExtConst, DL, VT));
18124 }
18125
18126 return SDValue();
18127}
18128
18130 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
18131 if (!C)
18132 return nullptr;
18133 const APInt *CV = &C->getAPIntValue();
18134 return CV->isPowerOf2() ? CV : nullptr;
18135}
18136
18138 // If we have a CMOV, OR and AND combination such as:
18139 // if (x & CN)
18140 // y |= CM;
18141 //
18142 // And:
18143 // * CN is a single bit;
18144 // * All bits covered by CM are known zero in y
18145 //
18146 // Then we can convert this into a sequence of BFI instructions. This will
18147 // always be a win if CM is a single bit, will always be no worse than the
18148 // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
18149 // three bits (due to the extra IT instruction).
18150
18151 SDValue Op0 = CMOV->getOperand(0);
18152 SDValue Op1 = CMOV->getOperand(1);
18153 auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue();
18154 SDValue CmpZ = CMOV->getOperand(3);
18155
18156 // The compare must be against zero.
18157 if (!isNullConstant(CmpZ->getOperand(1)))
18158 return SDValue();
18159
18160 assert(CmpZ->getOpcode() == ARMISD::CMPZ);
18161 SDValue And = CmpZ->getOperand(0);
18162 if (And->getOpcode() != ISD::AND)
18163 return SDValue();
18164 const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
18165 if (!AndC)
18166 return SDValue();
18167 SDValue X = And->getOperand(0);
18168
18169 if (CC == ARMCC::EQ) {
18170 // We're performing an "equal to zero" compare. Swap the operands so we
18171 // canonicalize on a "not equal to zero" compare.
18172 std::swap(Op0, Op1);
18173 } else {
18174 assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
18175 }
18176
18177 if (Op1->getOpcode() != ISD::OR)
18178 return SDValue();
18179
18180 ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
18181 if (!OrC)
18182 return SDValue();
18183 SDValue Y = Op1->getOperand(0);
18184
18185 if (Op0 != Y)
18186 return SDValue();
18187
18188 // Now, is it profitable to continue?
18189 APInt OrCI = OrC->getAPIntValue();
18190 unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
18191 if (OrCI.popcount() > Heuristic)
18192 return SDValue();
18193
18194 // Lastly, can we determine that the bits defined by OrCI
18195 // are zero in Y?
18196 KnownBits Known = DAG.computeKnownBits(Y);
18197 if ((OrCI & Known.Zero) != OrCI)
18198 return SDValue();
18199
18200 // OK, we can do the combine.
18201 SDValue V = Y;
18202 SDLoc dl(X);
18203 EVT VT = X.getValueType();
18204 unsigned BitInX = AndC->logBase2();
18205
18206 if (BitInX != 0) {
18207 // We must shift X first.
18208 X = DAG.getNode(ISD::SRL, dl, VT, X,
18209 DAG.getConstant(BitInX, dl, VT));
18210 }
18211
18212 for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
18213 BitInY < NumActiveBits; ++BitInY) {
18214 if (OrCI[BitInY] == 0)
18215 continue;
18216 APInt Mask(VT.getSizeInBits(), 0);
18217 Mask.setBit(BitInY);
18218 V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
18219 // Confusingly, the operand is an *inverted* mask.
18220 DAG.getConstant(~Mask, dl, VT));
18221 }
18222
18223 return V;
18224}
18225
18226// Given N, the value controlling the conditional branch, search for the loop
18227// intrinsic, returning it, along with how the value is used. We need to handle
18228// patterns such as the following:
18229// (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit)
18230// (brcond (setcc (loop.decrement), 0, eq), exit)
18231// (brcond (setcc (loop.decrement), 0, ne), header)
18233 bool &Negate) {
18234 switch (N->getOpcode()) {
18235 default:
18236 break;
18237 case ISD::XOR: {
18238 if (!isa<ConstantSDNode>(N.getOperand(1)))
18239 return SDValue();
18240 if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())
18241 return SDValue();
18242 Negate = !Negate;
18243 return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate);
18244 }
18245 case ISD::SETCC: {
18246 auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));
18247 if (!Const)
18248 return SDValue();
18249 if (Const->isZero())
18250 Imm = 0;
18251 else if (Const->isOne())
18252 Imm = 1;
18253 else
18254 return SDValue();
18255 CC = cast<CondCodeSDNode>(N.getOperand(2))->get();
18256 return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
18257 }
18259 unsigned IntOp = N.getConstantOperandVal(1);
18260 if (IntOp != Intrinsic::test_start_loop_iterations &&
18261 IntOp != Intrinsic::loop_decrement_reg)
18262 return SDValue();
18263 return N;
18264 }
18265 }
18266 return SDValue();
18267}
18268
18271 const ARMSubtarget *ST) {
18272
18273 // The hwloop intrinsics that we're interested are used for control-flow,
18274 // either for entering or exiting the loop:
18275 // - test.start.loop.iterations will test whether its operand is zero. If it
18276 // is zero, the proceeding branch should not enter the loop.
18277 // - loop.decrement.reg also tests whether its operand is zero. If it is
18278 // zero, the proceeding branch should not branch back to the beginning of
18279 // the loop.
18280 // So here, we need to check that how the brcond is using the result of each
18281 // of the intrinsics to ensure that we're branching to the right place at the
18282 // right time.
18283
18285 SDValue Cond;
18286 int Imm = 1;
18287 bool Negate = false;
18288 SDValue Chain = N->getOperand(0);
18289 SDValue Dest;
18290
18291 if (N->getOpcode() == ISD::BRCOND) {
18292 CC = ISD::SETEQ;
18293 Cond = N->getOperand(1);
18294 Dest = N->getOperand(2);
18295 } else {
18296 assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!");
18297 CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18298 Cond = N->getOperand(2);
18299 Dest = N->getOperand(4);
18300 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
18301 if (!Const->isOne() && !Const->isZero())
18302 return SDValue();
18303 Imm = Const->getZExtValue();
18304 } else
18305 return SDValue();
18306 }
18307
18308 SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate);
18309 if (!Int)
18310 return SDValue();
18311
18312 if (Negate)
18313 CC = ISD::getSetCCInverse(CC, /* Integer inverse */ MVT::i32);
18314
18315 auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) {
18316 return (CC == ISD::SETEQ && Imm == 0) ||
18317 (CC == ISD::SETNE && Imm == 1) ||
18318 (CC == ISD::SETLT && Imm == 1) ||
18319 (CC == ISD::SETULT && Imm == 1);
18320 };
18321
18322 auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) {
18323 return (CC == ISD::SETEQ && Imm == 1) ||
18324 (CC == ISD::SETNE && Imm == 0) ||
18325 (CC == ISD::SETGT && Imm == 0) ||
18326 (CC == ISD::SETUGT && Imm == 0) ||
18327 (CC == ISD::SETGE && Imm == 1) ||
18328 (CC == ISD::SETUGE && Imm == 1);
18329 };
18330
18331 assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&
18332 "unsupported condition");
18333
18334 SDLoc dl(Int);
18335 SelectionDAG &DAG = DCI.DAG;
18336 SDValue Elements = Int.getOperand(2);
18337 unsigned IntOp = Int->getConstantOperandVal(1);
18338 assert((N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BR) &&
18339 "expected single br user");
18340 SDNode *Br = *N->user_begin();
18341 SDValue OtherTarget = Br->getOperand(1);
18342
18343 // Update the unconditional branch to branch to the given Dest.
18344 auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) {
18345 SDValue NewBrOps[] = { Br->getOperand(0), Dest };
18346 SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps);
18347 DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr);
18348 };
18349
18350 if (IntOp == Intrinsic::test_start_loop_iterations) {
18351 SDValue Res;
18352 SDValue Setup = DAG.getNode(ARMISD::WLSSETUP, dl, MVT::i32, Elements);
18353 // We expect this 'instruction' to branch when the counter is zero.
18354 if (IsTrueIfZero(CC, Imm)) {
18355 SDValue Ops[] = {Chain, Setup, Dest};
18356 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18357 } else {
18358 // The logic is the reverse of what we need for WLS, so find the other
18359 // basic block target: the target of the proceeding br.
18360 UpdateUncondBr(Br, Dest, DAG);
18361
18362 SDValue Ops[] = {Chain, Setup, OtherTarget};
18363 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18364 }
18365 // Update LR count to the new value
18366 DAG.ReplaceAllUsesOfValueWith(Int.getValue(0), Setup);
18367 // Update chain
18368 DAG.ReplaceAllUsesOfValueWith(Int.getValue(2), Int.getOperand(0));
18369 return Res;
18370 } else {
18371 SDValue Size =
18372 DAG.getTargetConstant(Int.getConstantOperandVal(3), dl, MVT::i32);
18373 SDValue Args[] = { Int.getOperand(0), Elements, Size, };
18374 SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl,
18375 DAG.getVTList(MVT::i32, MVT::Other), Args);
18376 DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode());
18377
18378 // We expect this instruction to branch when the count is not zero.
18379 SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;
18380
18381 // Update the unconditional branch to target the loop preheader if we've
18382 // found the condition has been reversed.
18383 if (Target == OtherTarget)
18384 UpdateUncondBr(Br, Dest, DAG);
18385
18386 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18387 SDValue(LoopDec.getNode(), 1), Chain);
18388
18389 SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target };
18390 return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs);
18391 }
18392 return SDValue();
18393}
18394
18395/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
18396SDValue
18398 SDValue Cmp = N->getOperand(3);
18399 if (Cmp.getOpcode() != ARMISD::CMPZ)
18400 // Only looking at NE cases.
18401 return SDValue();
18402
18403 SDLoc dl(N);
18404 SDValue LHS = Cmp.getOperand(0);
18405 SDValue RHS = Cmp.getOperand(1);
18406 SDValue Chain = N->getOperand(0);
18407 SDValue BB = N->getOperand(1);
18408 SDValue ARMcc = N->getOperand(2);
18410
18411 // (brcond Chain BB ne (cmpz (and (cmov 0 1 CC Flags) 1) 0))
18412 // -> (brcond Chain BB CC Flags)
18413 if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
18414 LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
18415 LHS->getOperand(0)->hasOneUse() &&
18416 isNullConstant(LHS->getOperand(0)->getOperand(0)) &&
18417 isOneConstant(LHS->getOperand(0)->getOperand(1)) &&
18418 isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) {
18419 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, BB,
18420 LHS->getOperand(0)->getOperand(2),
18421 LHS->getOperand(0)->getOperand(3));
18422 }
18423
18424 return SDValue();
18425}
18426
18427/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
18428SDValue
18430 SDValue Cmp = N->getOperand(3);
18431 if (Cmp.getOpcode() != ARMISD::CMPZ)
18432 // Only looking at EQ and NE cases.
18433 return SDValue();
18434
18435 EVT VT = N->getValueType(0);
18436 SDLoc dl(N);
18437 SDValue LHS = Cmp.getOperand(0);
18438 SDValue RHS = Cmp.getOperand(1);
18439 SDValue FalseVal = N->getOperand(0);
18440 SDValue TrueVal = N->getOperand(1);
18441 SDValue ARMcc = N->getOperand(2);
18443
18444 // BFI is only available on V6T2+.
18445 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
18447 if (R)
18448 return R;
18449 }
18450
18451 // Simplify
18452 // mov r1, r0
18453 // cmp r1, x
18454 // mov r0, y
18455 // moveq r0, x
18456 // to
18457 // cmp r0, x
18458 // movne r0, y
18459 //
18460 // mov r1, r0
18461 // cmp r1, x
18462 // mov r0, x
18463 // movne r0, y
18464 // to
18465 // cmp r0, x
18466 // movne r0, y
18467 /// FIXME: Turn this into a target neutral optimization?
18468 SDValue Res;
18469 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
18470 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, Cmp);
18471 } else if (CC == ARMCC::EQ && TrueVal == RHS) {
18472 SDValue ARMcc;
18473 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
18474 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, NewCmp);
18475 }
18476
18477 // (cmov F T ne (cmpz (cmov 0 1 CC Flags) 0))
18478 // -> (cmov F T CC Flags)
18479 if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&
18480 isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
18482 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
18483 LHS->getOperand(2), LHS->getOperand(3));
18484 }
18485
18486 if (!VT.isInteger())
18487 return SDValue();
18488
18489 // Fold away an unneccessary CMPZ/CMOV
18490 // CMOV A, B, C1, (CMPZ (CMOV 1, 0, C2, D), 0) ->
18491 // if C1==EQ -> CMOV A, B, C2, D
18492 // if C1==NE -> CMOV A, B, NOT(C2), D
18493 if (N->getConstantOperandVal(2) == ARMCC::EQ ||
18494 N->getConstantOperandVal(2) == ARMCC::NE) {
18496 if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
18497 if (N->getConstantOperandVal(2) == ARMCC::NE)
18499 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
18500 N->getOperand(1),
18501 DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
18502 }
18503 }
18504
18505 // Materialize a boolean comparison for integers so we can avoid branching.
18506 if (isNullConstant(FalseVal)) {
18507 if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
18508 if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
18509 // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
18510 // right 5 bits will make that 32 be 1, otherwise it will be 0.
18511 // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
18512 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18513 Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub),
18514 DAG.getConstant(5, dl, MVT::i32));
18515 } else {
18516 // CMOV 0, 1, ==, (CMPZ x, y) ->
18517 // (UADDO_CARRY (SUB x, y), t:0, t:1)
18518 // where t = (USUBO_CARRY 0, (SUB x, y), 0)
18519 //
18520 // The USUBO_CARRY computes 0 - (x - y) and this will give a borrow when
18521 // x != y. In other words, a carry C == 1 when x == y, C == 0
18522 // otherwise.
18523 // The final UADDO_CARRY computes
18524 // x - y + (0 - (x - y)) + C == C
18525 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18526 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18527 SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);
18528 // ISD::USUBO_CARRY returns a borrow but we want the carry here
18529 // actually.
18530 SDValue Carry =
18531 DAG.getNode(ISD::SUB, dl, MVT::i32,
18532 DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
18533 Res = DAG.getNode(ISD::UADDO_CARRY, dl, VTs, Sub, Neg, Carry);
18534 }
18535 } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
18536 (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
18537 // This seems pointless but will allow us to combine it further below.
18538 // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
18539 SDValue Sub =
18540 DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18541 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
18542 Sub.getValue(1));
18543 FalseVal = Sub;
18544 }
18545 } else if (isNullConstant(TrueVal)) {
18546 if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
18547 (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
18548 // This seems pointless but will allow us to combine it further below
18549 // Note that we change == for != as this is the dual for the case above.
18550 // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
18551 SDValue Sub =
18552 DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18553 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
18554 DAG.getConstant(ARMCC::NE, dl, MVT::i32),
18555 Sub.getValue(1));
18556 FalseVal = Sub;
18557 }
18558 }
18559
18560 // On Thumb1, the DAG above may be further combined if z is a power of 2
18561 // (z == 2 ^ K).
18562 // CMOV (SUBC x, y), z, !=, (SUBC x, y):1 ->
18563 // t1 = (USUBO (SUB x, y), 1)
18564 // t2 = (USUBO_CARRY (SUB x, y), t1:0, t1:1)
18565 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18566 //
18567 // This also handles the special case of comparing against zero; it's
18568 // essentially, the same pattern, except there's no SUBC:
18569 // CMOV x, z, !=, (CMPZ x, 0) ->
18570 // t1 = (USUBO x, 1)
18571 // t2 = (USUBO_CARRY x, t1:0, t1:1)
18572 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18573 const APInt *TrueConst;
18574 if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
18575 ((FalseVal.getOpcode() == ARMISD::SUBC && FalseVal.getOperand(0) == LHS &&
18576 FalseVal.getOperand(1) == RHS) ||
18577 (FalseVal == LHS && isNullConstant(RHS))) &&
18578 (TrueConst = isPowerOf2Constant(TrueVal))) {
18579 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18580 unsigned ShiftAmount = TrueConst->logBase2();
18581 if (ShiftAmount)
18582 TrueVal = DAG.getConstant(1, dl, VT);
18583 SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
18584 Res = DAG.getNode(ISD::USUBO_CARRY, dl, VTs, FalseVal, Subc,
18585 Subc.getValue(1));
18586
18587 if (ShiftAmount)
18588 Res = DAG.getNode(ISD::SHL, dl, VT, Res,
18589 DAG.getConstant(ShiftAmount, dl, MVT::i32));
18590 }
18591
18592 if (Res.getNode()) {
18593 KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
18594 // Capture demanded bits information that would be otherwise lost.
18595 if (Known.Zero == 0xfffffffe)
18596 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18597 DAG.getValueType(MVT::i1));
18598 else if (Known.Zero == 0xffffff00)
18599 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18600 DAG.getValueType(MVT::i8));
18601 else if (Known.Zero == 0xffff0000)
18602 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18603 DAG.getValueType(MVT::i16));
18604 }
18605
18606 return Res;
18607}
18608
18611 const ARMSubtarget *ST) {
18612 SelectionDAG &DAG = DCI.DAG;
18613 SDValue Src = N->getOperand(0);
18614 EVT DstVT = N->getValueType(0);
18615
18616 // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.
18617 if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
18618 EVT SrcVT = Src.getValueType();
18619 if (SrcVT.getScalarSizeInBits() == DstVT.getScalarSizeInBits())
18620 return DAG.getNode(ARMISD::VDUP, SDLoc(N), DstVT, Src.getOperand(0));
18621 }
18622
18623 // We may have a bitcast of something that has already had this bitcast
18624 // combine performed on it, so skip past any VECTOR_REG_CASTs.
18625 if (Src.getOpcode() == ARMISD::VECTOR_REG_CAST &&
18626 Src.getOperand(0).getValueType().getScalarSizeInBits() <=
18627 Src.getValueType().getScalarSizeInBits())
18628 Src = Src.getOperand(0);
18629
18630 // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
18631 // would be generated is at least the width of the element type.
18632 EVT SrcVT = Src.getValueType();
18633 if ((Src.getOpcode() == ARMISD::VMOVIMM ||
18634 Src.getOpcode() == ARMISD::VMVNIMM ||
18635 Src.getOpcode() == ARMISD::VMOVFPIMM) &&
18636 SrcVT.getScalarSizeInBits() <= DstVT.getScalarSizeInBits() &&
18637 DAG.getDataLayout().isBigEndian())
18638 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(N), DstVT, Src);
18639
18640 // bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD x
18641 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
18642 return R;
18643
18644 return SDValue();
18645}
18646
18647// Some combines for the MVETrunc truncations legalizer helper. Also lowers the
18648// node into stack operations after legalizeOps.
18651 SelectionDAG &DAG = DCI.DAG;
18652 EVT VT = N->getValueType(0);
18653 SDLoc DL(N);
18654
18655 // MVETrunc(Undef, Undef) -> Undef
18656 if (all_of(N->ops(), [](SDValue Op) { return Op.isUndef(); }))
18657 return DAG.getUNDEF(VT);
18658
18659 // MVETrunc(MVETrunc a b, MVETrunc c, d) -> MVETrunc
18660 if (N->getNumOperands() == 2 &&
18661 N->getOperand(0).getOpcode() == ARMISD::MVETRUNC &&
18662 N->getOperand(1).getOpcode() == ARMISD::MVETRUNC)
18663 return DAG.getNode(ARMISD::MVETRUNC, DL, VT, N->getOperand(0).getOperand(0),
18664 N->getOperand(0).getOperand(1),
18665 N->getOperand(1).getOperand(0),
18666 N->getOperand(1).getOperand(1));
18667
18668 // MVETrunc(shuffle, shuffle) -> VMOVN
18669 if (N->getNumOperands() == 2 &&
18670 N->getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
18671 N->getOperand(1).getOpcode() == ISD::VECTOR_SHUFFLE) {
18672 auto *S0 = cast<ShuffleVectorSDNode>(N->getOperand(0).getNode());
18673 auto *S1 = cast<ShuffleVectorSDNode>(N->getOperand(1).getNode());
18674
18675 if (S0->getOperand(0) == S1->getOperand(0) &&
18676 S0->getOperand(1) == S1->getOperand(1)) {
18677 // Construct complete shuffle mask
18678 SmallVector<int, 8> Mask(S0->getMask());
18679 Mask.append(S1->getMask().begin(), S1->getMask().end());
18680
18681 if (isVMOVNTruncMask(Mask, VT, false))
18682 return DAG.getNode(
18683 ARMISD::VMOVN, DL, VT,
18684 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18685 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18686 DAG.getConstant(1, DL, MVT::i32));
18687 if (isVMOVNTruncMask(Mask, VT, true))
18688 return DAG.getNode(
18689 ARMISD::VMOVN, DL, VT,
18690 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18691 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18692 DAG.getConstant(1, DL, MVT::i32));
18693 }
18694 }
18695
18696 // For MVETrunc of a buildvector or shuffle, it can be beneficial to lower the
18697 // truncate to a buildvector to allow the generic optimisations to kick in.
18698 if (all_of(N->ops(), [](SDValue Op) {
18699 return Op.getOpcode() == ISD::BUILD_VECTOR ||
18700 Op.getOpcode() == ISD::VECTOR_SHUFFLE ||
18701 (Op.getOpcode() == ISD::BITCAST &&
18702 Op.getOperand(0).getOpcode() == ISD::BUILD_VECTOR);
18703 })) {
18704 SmallVector<SDValue, 8> Extracts;
18705 for (unsigned Op = 0; Op < N->getNumOperands(); Op++) {
18706 SDValue O = N->getOperand(Op);
18707 for (unsigned i = 0; i < O.getValueType().getVectorNumElements(); i++) {
18708 SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, O,
18709 DAG.getConstant(i, DL, MVT::i32));
18710 Extracts.push_back(Ext);
18711 }
18712 }
18713 return DAG.getBuildVector(VT, DL, Extracts);
18714 }
18715
18716 // If we are late in the legalization process and nothing has optimised
18717 // the trunc to anything better, lower it to a stack store and reload,
18718 // performing the truncation whilst keeping the lanes in the correct order:
18719 // VSTRH.32 a, stack; VSTRH.32 b, stack+8; VLDRW.32 stack;
18720 if (!DCI.isAfterLegalizeDAG())
18721 return SDValue();
18722
18723 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18724 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18725 int NumIns = N->getNumOperands();
18726 assert((NumIns == 2 || NumIns == 4) &&
18727 "Expected 2 or 4 inputs to an MVETrunc");
18728 EVT StoreVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
18729 if (N->getNumOperands() == 4)
18730 StoreVT = StoreVT.getHalfNumVectorElementsVT(*DAG.getContext());
18731
18732 SmallVector<SDValue> Chains;
18733 for (int I = 0; I < NumIns; I++) {
18734 SDValue Ptr = DAG.getNode(
18735 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18736 DAG.getConstant(I * 16 / NumIns, DL, StackPtr.getValueType()));
18738 DAG.getMachineFunction(), SPFI, I * 16 / NumIns);
18739 SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), DL, N->getOperand(I),
18740 Ptr, MPI, StoreVT, Align(4));
18741 Chains.push_back(Ch);
18742 }
18743
18744 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18745 MachinePointerInfo MPI =
18747 return DAG.getLoad(VT, DL, Chain, StackPtr, MPI, Align(4));
18748}
18749
18750// Take a MVEEXT(load x) and split that into (extload x, extload x+8)
18752 SelectionDAG &DAG) {
18753 SDValue N0 = N->getOperand(0);
18754 LoadSDNode *LD = dyn_cast<LoadSDNode>(N0.getNode());
18755 if (!LD || !LD->isSimple() || !N0.hasOneUse() || LD->isIndexed())
18756 return SDValue();
18757
18758 EVT FromVT = LD->getMemoryVT();
18759 EVT ToVT = N->getValueType(0);
18760 if (!ToVT.isVector())
18761 return SDValue();
18762 assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements() * 2);
18763 EVT ToEltVT = ToVT.getVectorElementType();
18764 EVT FromEltVT = FromVT.getVectorElementType();
18765
18766 unsigned NumElements = 0;
18767 if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8))
18768 NumElements = 4;
18769 if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)
18770 NumElements = 8;
18771 assert(NumElements != 0);
18772
18773 ISD::LoadExtType NewExtType =
18774 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
18775 if (LD->getExtensionType() != ISD::NON_EXTLOAD &&
18776 LD->getExtensionType() != ISD::EXTLOAD &&
18777 LD->getExtensionType() != NewExtType)
18778 return SDValue();
18779
18780 LLVMContext &C = *DAG.getContext();
18781 SDLoc DL(LD);
18782 // Details about the old load
18783 SDValue Ch = LD->getChain();
18784 SDValue BasePtr = LD->getBasePtr();
18785 Align Alignment = LD->getOriginalAlign();
18786 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
18787 AAMDNodes AAInfo = LD->getAAInfo();
18788
18789 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
18790 EVT NewFromVT = EVT::getVectorVT(
18791 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
18792 EVT NewToVT = EVT::getVectorVT(
18793 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
18794
18797 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
18798 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
18799 SDValue NewPtr =
18800 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
18801
18802 SDValue NewLoad =
18803 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
18804 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
18805 Alignment, MMOFlags, AAInfo);
18806 Loads.push_back(NewLoad);
18807 Chains.push_back(SDValue(NewLoad.getNode(), 1));
18808 }
18809
18810 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18811 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
18812 return DAG.getMergeValues(Loads, DL);
18813}
18814
18815// Perform combines for MVEEXT. If it has not be optimized to anything better
18816// before lowering, it gets converted to stack store and extloads performing the
18817// extend whilst still keeping the same lane ordering.
18820 SelectionDAG &DAG = DCI.DAG;
18821 EVT VT = N->getValueType(0);
18822 SDLoc DL(N);
18823 assert(N->getNumValues() == 2 && "Expected MVEEXT with 2 elements");
18824 assert((VT == MVT::v4i32 || VT == MVT::v8i16) && "Unexpected MVEEXT type");
18825
18826 EVT ExtVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18827 *DAG.getContext());
18828 auto Extend = [&](SDValue V) {
18829 SDValue VVT = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, V);
18830 return N->getOpcode() == ARMISD::MVESEXT
18831 ? DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, VVT,
18832 DAG.getValueType(ExtVT))
18833 : DAG.getZeroExtendInReg(VVT, DL, ExtVT);
18834 };
18835
18836 // MVEEXT(VDUP) -> SIGN_EXTEND_INREG(VDUP)
18837 if (N->getOperand(0).getOpcode() == ARMISD::VDUP) {
18838 SDValue Ext = Extend(N->getOperand(0));
18839 return DAG.getMergeValues({Ext, Ext}, DL);
18840 }
18841
18842 // MVEEXT(shuffle) -> SIGN_EXTEND_INREG/ZERO_EXTEND_INREG
18843 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0))) {
18844 ArrayRef<int> Mask = SVN->getMask();
18845 assert(Mask.size() == 2 * VT.getVectorNumElements());
18846 assert(Mask.size() == SVN->getValueType(0).getVectorNumElements());
18847 unsigned Rev = VT == MVT::v4i32 ? ARMISD::VREV32 : ARMISD::VREV16;
18848 SDValue Op0 = SVN->getOperand(0);
18849 SDValue Op1 = SVN->getOperand(1);
18850
18851 auto CheckInregMask = [&](int Start, int Offset) {
18852 for (int Idx = 0, E = VT.getVectorNumElements(); Idx < E; ++Idx)
18853 if (Mask[Start + Idx] >= 0 && Mask[Start + Idx] != Idx * 2 + Offset)
18854 return false;
18855 return true;
18856 };
18857 SDValue V0 = SDValue(N, 0);
18858 SDValue V1 = SDValue(N, 1);
18859 if (CheckInregMask(0, 0))
18860 V0 = Extend(Op0);
18861 else if (CheckInregMask(0, 1))
18862 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18863 else if (CheckInregMask(0, Mask.size()))
18864 V0 = Extend(Op1);
18865 else if (CheckInregMask(0, Mask.size() + 1))
18866 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18867
18868 if (CheckInregMask(VT.getVectorNumElements(), Mask.size()))
18869 V1 = Extend(Op1);
18870 else if (CheckInregMask(VT.getVectorNumElements(), Mask.size() + 1))
18871 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18872 else if (CheckInregMask(VT.getVectorNumElements(), 0))
18873 V1 = Extend(Op0);
18874 else if (CheckInregMask(VT.getVectorNumElements(), 1))
18875 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18876
18877 if (V0.getNode() != N || V1.getNode() != N)
18878 return DAG.getMergeValues({V0, V1}, DL);
18879 }
18880
18881 // MVEEXT(load) -> extload, extload
18882 if (N->getOperand(0)->getOpcode() == ISD::LOAD)
18884 return L;
18885
18886 if (!DCI.isAfterLegalizeDAG())
18887 return SDValue();
18888
18889 // Lower to a stack store and reload:
18890 // VSTRW.32 a, stack; VLDRH.32 stack; VLDRH.32 stack+8;
18891 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18892 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18893 int NumOuts = N->getNumValues();
18894 assert((NumOuts == 2 || NumOuts == 4) &&
18895 "Expected 2 or 4 outputs to an MVEEXT");
18896 EVT LoadVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18897 *DAG.getContext());
18898 if (N->getNumOperands() == 4)
18899 LoadVT = LoadVT.getHalfNumVectorElementsVT(*DAG.getContext());
18900
18901 MachinePointerInfo MPI =
18903 SDValue Chain = DAG.getStore(DAG.getEntryNode(), DL, N->getOperand(0),
18904 StackPtr, MPI, Align(4));
18905
18907 for (int I = 0; I < NumOuts; I++) {
18908 SDValue Ptr = DAG.getNode(
18909 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18910 DAG.getConstant(I * 16 / NumOuts, DL, StackPtr.getValueType()));
18912 DAG.getMachineFunction(), SPFI, I * 16 / NumOuts);
18913 SDValue Load = DAG.getExtLoad(
18914 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL,
18915 VT, Chain, Ptr, MPI, LoadVT, Align(4));
18916 Loads.push_back(Load);
18917 }
18918
18919 return DAG.getMergeValues(Loads, DL);
18920}
18921
18923 DAGCombinerInfo &DCI) const {
18924 switch (N->getOpcode()) {
18925 default: break;
18926 case ISD::SELECT_CC:
18927 case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
18928 case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
18929 case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
18930 case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
18931 case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
18932 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
18933 case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);
18934 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
18935 case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
18936 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
18937 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
18938 case ISD::BRCOND:
18939 case ISD::BR_CC: return PerformHWLoopCombine(N, DCI, Subtarget);
18940 case ARMISD::ADDC:
18941 case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
18942 case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
18943 case ARMISD::BFI: return PerformBFICombine(N, DCI.DAG);
18944 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
18945 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
18946 case ARMISD::VMOVhr: return PerformVMOVhrCombine(N, DCI);
18947 case ARMISD::VMOVrh: return PerformVMOVrhCombine(N, DCI.DAG);
18948 case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
18949 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
18952 return PerformExtractEltCombine(N, DCI, Subtarget);
18956 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
18957 case ARMISD::VDUP: return PerformVDUPCombine(N, DCI.DAG, Subtarget);
18958 case ISD::FP_TO_SINT:
18959 case ISD::FP_TO_UINT:
18960 return PerformVCVTCombine(N, DCI.DAG, Subtarget);
18961 case ISD::FADD:
18962 return PerformFADDCombine(N, DCI.DAG, Subtarget);
18963 case ISD::FMUL:
18964 return PerformVMulVCTPCombine(N, DCI.DAG, Subtarget);
18966 return PerformIntrinsicCombine(N, DCI);
18967 case ISD::SHL:
18968 case ISD::SRA:
18969 case ISD::SRL:
18970 return PerformShiftCombine(N, DCI, Subtarget);
18971 case ISD::SIGN_EXTEND:
18972 case ISD::ZERO_EXTEND:
18973 case ISD::ANY_EXTEND:
18974 return PerformExtendCombine(N, DCI.DAG, Subtarget);
18975 case ISD::FP_EXTEND:
18976 return PerformFPExtendCombine(N, DCI.DAG, Subtarget);
18977 case ISD::SMIN:
18978 case ISD::UMIN:
18979 case ISD::SMAX:
18980 case ISD::UMAX:
18981 return PerformMinMaxCombine(N, DCI.DAG, Subtarget);
18982 case ARMISD::CMOV:
18983 return PerformCMOVCombine(N, DCI.DAG);
18984 case ARMISD::BRCOND:
18985 return PerformBRCONDCombine(N, DCI.DAG);
18986 case ARMISD::CMPZ:
18987 return PerformCMPZCombine(N, DCI.DAG);
18988 case ARMISD::CSINC:
18989 case ARMISD::CSINV:
18990 case ARMISD::CSNEG:
18991 return PerformCSETCombine(N, DCI.DAG);
18992 case ISD::LOAD:
18993 return PerformLOADCombine(N, DCI, Subtarget);
18994 case ARMISD::VLD1DUP:
18995 case ARMISD::VLD2DUP:
18996 case ARMISD::VLD3DUP:
18997 case ARMISD::VLD4DUP:
18998 return PerformVLDCombine(N, DCI);
19000 return PerformARMBUILD_VECTORCombine(N, DCI);
19001 case ISD::BITCAST:
19002 return PerformBITCASTCombine(N, DCI, Subtarget);
19004 return PerformPREDICATE_CASTCombine(N, DCI);
19006 return PerformVECTOR_REG_CASTCombine(N, DCI.DAG, Subtarget);
19007 case ARMISD::MVETRUNC:
19008 return PerformMVETruncCombine(N, DCI);
19009 case ARMISD::MVESEXT:
19010 case ARMISD::MVEZEXT:
19011 return PerformMVEExtCombine(N, DCI);
19012 case ARMISD::VCMP:
19013 return PerformVCMPCombine(N, DCI.DAG, Subtarget);
19014 case ISD::VECREDUCE_ADD:
19015 return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);
19016 case ARMISD::VADDVs:
19017 case ARMISD::VADDVu:
19018 case ARMISD::VADDLVs:
19019 case ARMISD::VADDLVu:
19020 case ARMISD::VADDLVAs:
19021 case ARMISD::VADDLVAu:
19022 case ARMISD::VMLAVs:
19023 case ARMISD::VMLAVu:
19024 case ARMISD::VMLALVs:
19025 case ARMISD::VMLALVu:
19026 case ARMISD::VMLALVAs:
19027 case ARMISD::VMLALVAu:
19028 return PerformReduceShuffleCombine(N, DCI.DAG);
19029 case ARMISD::VMOVN:
19030 return PerformVMOVNCombine(N, DCI);
19031 case ARMISD::VQMOVNs:
19032 case ARMISD::VQMOVNu:
19033 return PerformVQMOVNCombine(N, DCI);
19034 case ARMISD::VQDMULH:
19035 return PerformVQDMULHCombine(N, DCI);
19036 case ARMISD::ASRL:
19037 case ARMISD::LSRL:
19038 case ARMISD::LSLL:
19039 return PerformLongShiftCombine(N, DCI.DAG);
19040 case ARMISD::SMULWB: {
19041 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19042 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
19043 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
19044 return SDValue();
19045 break;
19046 }
19047 case ARMISD::SMULWT: {
19048 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19049 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
19050 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
19051 return SDValue();
19052 break;
19053 }
19054 case ARMISD::SMLALBB:
19055 case ARMISD::QADD16b:
19056 case ARMISD::QSUB16b:
19057 case ARMISD::UQADD16b:
19058 case ARMISD::UQSUB16b: {
19059 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19060 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
19061 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19062 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19063 return SDValue();
19064 break;
19065 }
19066 case ARMISD::SMLALBT: {
19067 unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
19068 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
19069 unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
19070 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
19071 if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
19072 (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
19073 return SDValue();
19074 break;
19075 }
19076 case ARMISD::SMLALTB: {
19077 unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
19078 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
19079 unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
19080 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
19081 if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
19082 (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
19083 return SDValue();
19084 break;
19085 }
19086 case ARMISD::SMLALTT: {
19087 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19088 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
19089 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19090 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19091 return SDValue();
19092 break;
19093 }
19094 case ARMISD::QADD8b:
19095 case ARMISD::QSUB8b:
19096 case ARMISD::UQADD8b:
19097 case ARMISD::UQSUB8b: {
19098 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19099 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
19100 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19101 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19102 return SDValue();
19103 break;
19104 }
19105 case ARMISD::VBSP:
19106 if (N->getOperand(1) == N->getOperand(2))
19107 return N->getOperand(1);
19108 return SDValue();
19111 switch (N->getConstantOperandVal(1)) {
19112 case Intrinsic::arm_neon_vld1:
19113 case Intrinsic::arm_neon_vld1x2:
19114 case Intrinsic::arm_neon_vld1x3:
19115 case Intrinsic::arm_neon_vld1x4:
19116 case Intrinsic::arm_neon_vld2:
19117 case Intrinsic::arm_neon_vld3:
19118 case Intrinsic::arm_neon_vld4:
19119 case Intrinsic::arm_neon_vld2lane:
19120 case Intrinsic::arm_neon_vld3lane:
19121 case Intrinsic::arm_neon_vld4lane:
19122 case Intrinsic::arm_neon_vld2dup:
19123 case Intrinsic::arm_neon_vld3dup:
19124 case Intrinsic::arm_neon_vld4dup:
19125 case Intrinsic::arm_neon_vst1:
19126 case Intrinsic::arm_neon_vst1x2:
19127 case Intrinsic::arm_neon_vst1x3:
19128 case Intrinsic::arm_neon_vst1x4:
19129 case Intrinsic::arm_neon_vst2:
19130 case Intrinsic::arm_neon_vst3:
19131 case Intrinsic::arm_neon_vst4:
19132 case Intrinsic::arm_neon_vst2lane:
19133 case Intrinsic::arm_neon_vst3lane:
19134 case Intrinsic::arm_neon_vst4lane:
19135 return PerformVLDCombine(N, DCI);
19136 case Intrinsic::arm_mve_vld2q:
19137 case Intrinsic::arm_mve_vld4q:
19138 case Intrinsic::arm_mve_vst2q:
19139 case Intrinsic::arm_mve_vst4q:
19140 return PerformMVEVLDCombine(N, DCI);
19141 default: break;
19142 }
19143 break;
19144 }
19145 return SDValue();
19146}
19147
19149 EVT VT) const {
19150 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
19151}
19152
19154 Align Alignment,
19156 unsigned *Fast) const {
19157 // Depends what it gets converted into if the type is weird.
19158 if (!VT.isSimple())
19159 return false;
19160
19161 // The AllowsUnaligned flag models the SCTLR.A setting in ARM cpus
19162 bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
19163 auto Ty = VT.getSimpleVT().SimpleTy;
19164
19165 if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
19166 // Unaligned access can use (for example) LRDB, LRDH, LDR
19167 if (AllowsUnaligned) {
19168 if (Fast)
19169 *Fast = Subtarget->hasV7Ops();
19170 return true;
19171 }
19172 }
19173
19174 if (Ty == MVT::f64 || Ty == MVT::v2f64) {
19175 // For any little-endian targets with neon, we can support unaligned ld/st
19176 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
19177 // A big-endian target may also explicitly support unaligned accesses
19178 if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
19179 if (Fast)
19180 *Fast = 1;
19181 return true;
19182 }
19183 }
19184
19185 if (!Subtarget->hasMVEIntegerOps())
19186 return false;
19187
19188 // These are for predicates
19189 if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||
19190 Ty == MVT::v2i1)) {
19191 if (Fast)
19192 *Fast = 1;
19193 return true;
19194 }
19195
19196 // These are for truncated stores/narrowing loads. They are fine so long as
19197 // the alignment is at least the size of the item being loaded
19198 if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
19199 Alignment >= VT.getScalarSizeInBits() / 8) {
19200 if (Fast)
19201 *Fast = true;
19202 return true;
19203 }
19204
19205 // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
19206 // VSTRW.U32 all store the vector register in exactly the same format, and
19207 // differ only in the range of their immediate offset field and the required
19208 // alignment. So there is always a store that can be used, regardless of
19209 // actual type.
19210 //
19211 // For big endian, that is not the case. But can still emit a (VSTRB.U8;
19212 // VREV64.8) pair and get the same effect. This will likely be better than
19213 // aligning the vector through the stack.
19214 if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
19215 Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
19216 Ty == MVT::v2f64) {
19217 if (Fast)
19218 *Fast = 1;
19219 return true;
19220 }
19221
19222 return false;
19223}
19224
19225
19227 const MemOp &Op, const AttributeList &FuncAttributes) const {
19228 // See if we can use NEON instructions for this...
19229 if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
19230 !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
19231 unsigned Fast;
19232 if (Op.size() >= 16 &&
19233 (Op.isAligned(Align(16)) ||
19234 (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, Align(1),
19236 Fast))) {
19237 return MVT::v2f64;
19238 } else if (Op.size() >= 8 &&
19239 (Op.isAligned(Align(8)) ||
19241 MVT::f64, 0, Align(1), MachineMemOperand::MONone, &Fast) &&
19242 Fast))) {
19243 return MVT::f64;
19244 }
19245 }
19246
19247 // Let the target-independent logic figure it out.
19248 return MVT::Other;
19249}
19250
19251// 64-bit integers are split into their high and low parts and held in two
19252// different registers, so the trunc is free since the low register can just
19253// be used.
19254bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
19255 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
19256 return false;
19257 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
19258 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
19259 return (SrcBits == 64 && DestBits == 32);
19260}
19261
19263 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
19264 !DstVT.isInteger())
19265 return false;
19266 unsigned SrcBits = SrcVT.getSizeInBits();
19267 unsigned DestBits = DstVT.getSizeInBits();
19268 return (SrcBits == 64 && DestBits == 32);
19269}
19270
19272 if (Val.getOpcode() != ISD::LOAD)
19273 return false;
19274
19275 EVT VT1 = Val.getValueType();
19276 if (!VT1.isSimple() || !VT1.isInteger() ||
19277 !VT2.isSimple() || !VT2.isInteger())
19278 return false;
19279
19280 switch (VT1.getSimpleVT().SimpleTy) {
19281 default: break;
19282 case MVT::i1:
19283 case MVT::i8:
19284 case MVT::i16:
19285 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
19286 return true;
19287 }
19288
19289 return false;
19290}
19291
19293 if (!VT.isSimple())
19294 return false;
19295
19296 // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that
19297 // negate values directly (fneg is free). So, we don't want to let the DAG
19298 // combiner rewrite fneg into xors and some other instructions. For f16 and
19299 // FullFP16 argument passing, some bitcast nodes may be introduced,
19300 // triggering this DAG combine rewrite, so we are avoiding that with this.
19301 switch (VT.getSimpleVT().SimpleTy) {
19302 default: break;
19303 case MVT::f16:
19304 return Subtarget->hasFullFP16();
19305 }
19306
19307 return false;
19308}
19309
19311 if (!Subtarget->hasMVEIntegerOps())
19312 return nullptr;
19313 Type *SVIType = SVI->getType();
19314 Type *ScalarType = SVIType->getScalarType();
19315
19316 if (ScalarType->isFloatTy())
19317 return Type::getInt32Ty(SVIType->getContext());
19318 if (ScalarType->isHalfTy())
19319 return Type::getInt16Ty(SVIType->getContext());
19320 return nullptr;
19321}
19322
19324 EVT VT = ExtVal.getValueType();
19325
19326 if (!isTypeLegal(VT))
19327 return false;
19328
19329 if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.getOperand(0))) {
19330 if (Ld->isExpandingLoad())
19331 return false;
19332 }
19333
19334 if (Subtarget->hasMVEIntegerOps())
19335 return true;
19336
19337 // Don't create a loadext if we can fold the extension into a wide/long
19338 // instruction.
19339 // If there's more than one user instruction, the loadext is desirable no
19340 // matter what. There can be two uses by the same instruction.
19341 if (ExtVal->use_empty() ||
19342 !ExtVal->user_begin()->isOnlyUserOf(ExtVal.getNode()))
19343 return true;
19344
19345 SDNode *U = *ExtVal->user_begin();
19346 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
19347 U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
19348 return false;
19349
19350 return true;
19351}
19352
19354 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19355 return false;
19356
19357 if (!isTypeLegal(EVT::getEVT(Ty1)))
19358 return false;
19359
19360 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
19361
19362 // Assuming the caller doesn't have a zeroext or signext return parameter,
19363 // truncation all the way down to i1 is valid.
19364 return true;
19365}
19366
19367/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
19368/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
19369/// expanded to FMAs when this method returns true, otherwise fmuladd is
19370/// expanded to fmul + fadd.
19371///
19372/// ARM supports both fused and unfused multiply-add operations; we already
19373/// lower a pair of fmul and fadd to the latter so it's not clear that there
19374/// would be a gain or that the gain would be worthwhile enough to risk
19375/// correctness bugs.
19376///
19377/// For MVE, we set this to true as it helps simplify the need for some
19378/// patterns (and we don't have the non-fused floating point instruction).
19379bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
19380 EVT VT) const {
19381 if (Subtarget->useSoftFloat())
19382 return false;
19383
19384 if (!VT.isSimple())
19385 return false;
19386
19387 switch (VT.getSimpleVT().SimpleTy) {
19388 case MVT::v4f32:
19389 case MVT::v8f16:
19390 return Subtarget->hasMVEFloatOps();
19391 case MVT::f16:
19392 return Subtarget->useFPVFMx16();
19393 case MVT::f32:
19394 return Subtarget->useFPVFMx();
19395 case MVT::f64:
19396 return Subtarget->useFPVFMx64();
19397 default:
19398 break;
19399 }
19400
19401 return false;
19402}
19403
19404static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
19405 if (V < 0)
19406 return false;
19407
19408 unsigned Scale = 1;
19409 switch (VT.getSimpleVT().SimpleTy) {
19410 case MVT::i1:
19411 case MVT::i8:
19412 // Scale == 1;
19413 break;
19414 case MVT::i16:
19415 // Scale == 2;
19416 Scale = 2;
19417 break;
19418 default:
19419 // On thumb1 we load most things (i32, i64, floats, etc) with a LDR
19420 // Scale == 4;
19421 Scale = 4;
19422 break;
19423 }
19424
19425 if ((V & (Scale - 1)) != 0)
19426 return false;
19427 return isUInt<5>(V / Scale);
19428}
19429
19430static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
19431 const ARMSubtarget *Subtarget) {
19432 if (!VT.isInteger() && !VT.isFloatingPoint())
19433 return false;
19434 if (VT.isVector() && Subtarget->hasNEON())
19435 return false;
19436 if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
19437 !Subtarget->hasMVEFloatOps())
19438 return false;
19439
19440 bool IsNeg = false;
19441 if (V < 0) {
19442 IsNeg = true;
19443 V = -V;
19444 }
19445
19446 unsigned NumBytes = std::max((unsigned)VT.getSizeInBits() / 8, 1U);
19447
19448 // MVE: size * imm7
19449 if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
19450 switch (VT.getSimpleVT().getVectorElementType().SimpleTy) {
19451 case MVT::i32:
19452 case MVT::f32:
19453 return isShiftedUInt<7,2>(V);
19454 case MVT::i16:
19455 case MVT::f16:
19456 return isShiftedUInt<7,1>(V);
19457 case MVT::i8:
19458 return isUInt<7>(V);
19459 default:
19460 return false;
19461 }
19462 }
19463
19464 // half VLDR: 2 * imm8
19465 if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
19466 return isShiftedUInt<8, 1>(V);
19467 // VLDR and LDRD: 4 * imm8
19468 if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
19469 return isShiftedUInt<8, 2>(V);
19470
19471 if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
19472 // + imm12 or - imm8
19473 if (IsNeg)
19474 return isUInt<8>(V);
19475 return isUInt<12>(V);
19476 }
19477
19478 return false;
19479}
19480
19481/// isLegalAddressImmediate - Return true if the integer value can be used
19482/// as the offset of the target addressing mode for load / store of the
19483/// given type.
19484static bool isLegalAddressImmediate(int64_t V, EVT VT,
19485 const ARMSubtarget *Subtarget) {
19486 if (V == 0)
19487 return true;
19488
19489 if (!VT.isSimple())
19490 return false;
19491
19492 if (Subtarget->isThumb1Only())
19493 return isLegalT1AddressImmediate(V, VT);
19494 else if (Subtarget->isThumb2())
19495 return isLegalT2AddressImmediate(V, VT, Subtarget);
19496
19497 // ARM mode.
19498 if (V < 0)
19499 V = - V;
19500 switch (VT.getSimpleVT().SimpleTy) {
19501 default: return false;
19502 case MVT::i1:
19503 case MVT::i8:
19504 case MVT::i32:
19505 // +- imm12
19506 return isUInt<12>(V);
19507 case MVT::i16:
19508 // +- imm8
19509 return isUInt<8>(V);
19510 case MVT::f32:
19511 case MVT::f64:
19512 if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
19513 return false;
19514 return isShiftedUInt<8, 2>(V);
19515 }
19516}
19517
19519 EVT VT) const {
19520 int Scale = AM.Scale;
19521 if (Scale < 0)
19522 return false;
19523
19524 switch (VT.getSimpleVT().SimpleTy) {
19525 default: return false;
19526 case MVT::i1:
19527 case MVT::i8:
19528 case MVT::i16:
19529 case MVT::i32:
19530 if (Scale == 1)
19531 return true;
19532 // r + r << imm
19533 Scale = Scale & ~1;
19534 return Scale == 2 || Scale == 4 || Scale == 8;
19535 case MVT::i64:
19536 // FIXME: What are we trying to model here? ldrd doesn't have an r + r
19537 // version in Thumb mode.
19538 // r + r
19539 if (Scale == 1)
19540 return true;
19541 // r * 2 (this can be lowered to r + r).
19542 if (!AM.HasBaseReg && Scale == 2)
19543 return true;
19544 return false;
19545 case MVT::isVoid:
19546 // Note, we allow "void" uses (basically, uses that aren't loads or
19547 // stores), because arm allows folding a scale into many arithmetic
19548 // operations. This should be made more precise and revisited later.
19549
19550 // Allow r << imm, but the imm has to be a multiple of two.
19551 if (Scale & 1) return false;
19552 return isPowerOf2_32(Scale);
19553 }
19554}
19555
19557 EVT VT) const {
19558 const int Scale = AM.Scale;
19559
19560 // Negative scales are not supported in Thumb1.
19561 if (Scale < 0)
19562 return false;
19563
19564 // Thumb1 addressing modes do not support register scaling excepting the
19565 // following cases:
19566 // 1. Scale == 1 means no scaling.
19567 // 2. Scale == 2 this can be lowered to r + r if there is no base register.
19568 return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);
19569}
19570
19571/// isLegalAddressingMode - Return true if the addressing mode represented
19572/// by AM is legal for this target, for a load/store of the specified type.
19574 const AddrMode &AM, Type *Ty,
19575 unsigned AS, Instruction *I) const {
19576 EVT VT = getValueType(DL, Ty, true);
19577 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
19578 return false;
19579
19580 // Can never fold addr of global into load/store.
19581 if (AM.BaseGV)
19582 return false;
19583
19584 switch (AM.Scale) {
19585 case 0: // no scale reg, must be "r+i" or "r", or "i".
19586 break;
19587 default:
19588 // ARM doesn't support any R+R*scale+imm addr modes.
19589 if (AM.BaseOffs)
19590 return false;
19591
19592 if (!VT.isSimple())
19593 return false;
19594
19595 if (Subtarget->isThumb1Only())
19596 return isLegalT1ScaledAddressingMode(AM, VT);
19597
19598 if (Subtarget->isThumb2())
19599 return isLegalT2ScaledAddressingMode(AM, VT);
19600
19601 int Scale = AM.Scale;
19602 switch (VT.getSimpleVT().SimpleTy) {
19603 default: return false;
19604 case MVT::i1:
19605 case MVT::i8:
19606 case MVT::i32:
19607 if (Scale < 0) Scale = -Scale;
19608 if (Scale == 1)
19609 return true;
19610 // r + r << imm
19611 return isPowerOf2_32(Scale & ~1);
19612 case MVT::i16:
19613 case MVT::i64:
19614 // r +/- r
19615 if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
19616 return true;
19617 // r * 2 (this can be lowered to r + r).
19618 if (!AM.HasBaseReg && Scale == 2)
19619 return true;
19620 return false;
19621
19622 case MVT::isVoid:
19623 // Note, we allow "void" uses (basically, uses that aren't loads or
19624 // stores), because arm allows folding a scale into many arithmetic
19625 // operations. This should be made more precise and revisited later.
19626
19627 // Allow r << imm, but the imm has to be a multiple of two.
19628 if (Scale & 1) return false;
19629 return isPowerOf2_32(Scale);
19630 }
19631 }
19632 return true;
19633}
19634
19635/// isLegalICmpImmediate - Return true if the specified immediate is legal
19636/// icmp immediate, that is the target has icmp instructions which can compare
19637/// a register against the immediate without having to materialize the
19638/// immediate into a register.
19640 // Thumb2 and ARM modes can use cmn for negative immediates.
19641 if (!Subtarget->isThumb())
19642 return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
19643 ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
19644 if (Subtarget->isThumb2())
19645 return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
19646 ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
19647 // Thumb1 doesn't have cmn, and only 8-bit immediates.
19648 return Imm >= 0 && Imm <= 255;
19649}
19650
19651/// isLegalAddImmediate - Return true if the specified immediate is a legal add
19652/// *or sub* immediate, that is the target has add or sub instructions which can
19653/// add a register with the immediate without having to materialize the
19654/// immediate into a register.
19656 // Same encoding for add/sub, just flip the sign.
19657 int64_t AbsImm = std::abs(Imm);
19658 if (!Subtarget->isThumb())
19659 return ARM_AM::getSOImmVal(AbsImm) != -1;
19660 if (Subtarget->isThumb2())
19661 return ARM_AM::getT2SOImmVal(AbsImm) != -1;
19662 // Thumb1 only has 8-bit unsigned immediate.
19663 return AbsImm >= 0 && AbsImm <= 255;
19664}
19665
19666// Return false to prevent folding
19667// (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
19668// if the folding leads to worse code.
19670 SDValue ConstNode) const {
19671 // Let the DAGCombiner decide for vector types and large types.
19672 const EVT VT = AddNode.getValueType();
19673 if (VT.isVector() || VT.getScalarSizeInBits() > 32)
19674 return true;
19675
19676 // It is worse if c0 is legal add immediate, while c1*c0 is not
19677 // and has to be composed by at least two instructions.
19678 const ConstantSDNode *C0Node = cast<ConstantSDNode>(AddNode.getOperand(1));
19679 const ConstantSDNode *C1Node = cast<ConstantSDNode>(ConstNode);
19680 const int64_t C0 = C0Node->getSExtValue();
19681 APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue();
19683 return true;
19684 if (ConstantMaterializationCost((unsigned)CA.getZExtValue(), Subtarget) > 1)
19685 return false;
19686
19687 // Default to true and let the DAGCombiner decide.
19688 return true;
19689}
19690
19692 bool isSEXTLoad, SDValue &Base,
19693 SDValue &Offset, bool &isInc,
19694 SelectionDAG &DAG) {
19695 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19696 return false;
19697
19698 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
19699 // AddressingMode 3
19700 Base = Ptr->getOperand(0);
19701 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19702 int RHSC = (int)RHS->getZExtValue();
19703 if (RHSC < 0 && RHSC > -256) {
19704 assert(Ptr->getOpcode() == ISD::ADD);
19705 isInc = false;
19706 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19707 return true;
19708 }
19709 }
19710 isInc = (Ptr->getOpcode() == ISD::ADD);
19711 Offset = Ptr->getOperand(1);
19712 return true;
19713 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
19714 // AddressingMode 2
19715 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19716 int RHSC = (int)RHS->getZExtValue();
19717 if (RHSC < 0 && RHSC > -0x1000) {
19718 assert(Ptr->getOpcode() == ISD::ADD);
19719 isInc = false;
19720 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19721 Base = Ptr->getOperand(0);
19722 return true;
19723 }
19724 }
19725
19726 if (Ptr->getOpcode() == ISD::ADD) {
19727 isInc = true;
19728 ARM_AM::ShiftOpc ShOpcVal=
19729 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
19730 if (ShOpcVal != ARM_AM::no_shift) {
19731 Base = Ptr->getOperand(1);
19732 Offset = Ptr->getOperand(0);
19733 } else {
19734 Base = Ptr->getOperand(0);
19735 Offset = Ptr->getOperand(1);
19736 }
19737 return true;
19738 }
19739
19740 isInc = (Ptr->getOpcode() == ISD::ADD);
19741 Base = Ptr->getOperand(0);
19742 Offset = Ptr->getOperand(1);
19743 return true;
19744 }
19745
19746 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
19747 return false;
19748}
19749
19751 bool isSEXTLoad, SDValue &Base,
19752 SDValue &Offset, bool &isInc,
19753 SelectionDAG &DAG) {
19754 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19755 return false;
19756
19757 Base = Ptr->getOperand(0);
19758 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19759 int RHSC = (int)RHS->getZExtValue();
19760 if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
19761 assert(Ptr->getOpcode() == ISD::ADD);
19762 isInc = false;
19763 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19764 return true;
19765 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
19766 isInc = Ptr->getOpcode() == ISD::ADD;
19767 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19768 return true;
19769 }
19770 }
19771
19772 return false;
19773}
19774
19775static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment,
19776 bool isSEXTLoad, bool IsMasked, bool isLE,
19778 bool &isInc, SelectionDAG &DAG) {
19779 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19780 return false;
19781 if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
19782 return false;
19783
19784 // We allow LE non-masked loads to change the type (for example use a vldrb.8
19785 // as opposed to a vldrw.32). This can allow extra addressing modes or
19786 // alignments for what is otherwise an equivalent instruction.
19787 bool CanChangeType = isLE && !IsMasked;
19788
19789 ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
19790 int RHSC = (int)RHS->getZExtValue();
19791
19792 auto IsInRange = [&](int RHSC, int Limit, int Scale) {
19793 if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
19794 assert(Ptr->getOpcode() == ISD::ADD);
19795 isInc = false;
19796 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19797 return true;
19798 } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
19799 isInc = Ptr->getOpcode() == ISD::ADD;
19800 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19801 return true;
19802 }
19803 return false;
19804 };
19805
19806 // Try to find a matching instruction based on s/zext, Alignment, Offset and
19807 // (in BE/masked) type.
19808 Base = Ptr->getOperand(0);
19809 if (VT == MVT::v4i16) {
19810 if (Alignment >= 2 && IsInRange(RHSC, 0x80, 2))
19811 return true;
19812 } else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
19813 if (IsInRange(RHSC, 0x80, 1))
19814 return true;
19815 } else if (Alignment >= 4 &&
19816 (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
19817 IsInRange(RHSC, 0x80, 4))
19818 return true;
19819 else if (Alignment >= 2 &&
19820 (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
19821 IsInRange(RHSC, 0x80, 2))
19822 return true;
19823 else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
19824 return true;
19825 return false;
19826}
19827
19828/// getPreIndexedAddressParts - returns true by value, base pointer and
19829/// offset pointer and addressing mode by reference if the node's address
19830/// can be legally represented as pre-indexed load / store address.
19831bool
19833 SDValue &Offset,
19835 SelectionDAG &DAG) const {
19836 if (Subtarget->isThumb1Only())
19837 return false;
19838
19839 EVT VT;
19840 SDValue Ptr;
19841 Align Alignment;
19842 bool isSEXTLoad = false;
19843 bool IsMasked = false;
19844 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19845 Ptr = LD->getBasePtr();
19846 VT = LD->getMemoryVT();
19847 Alignment = LD->getAlign();
19848 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19849 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19850 Ptr = ST->getBasePtr();
19851 VT = ST->getMemoryVT();
19852 Alignment = ST->getAlign();
19853 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19854 Ptr = LD->getBasePtr();
19855 VT = LD->getMemoryVT();
19856 Alignment = LD->getAlign();
19857 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19858 IsMasked = true;
19859 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19860 Ptr = ST->getBasePtr();
19861 VT = ST->getMemoryVT();
19862 Alignment = ST->getAlign();
19863 IsMasked = true;
19864 } else
19865 return false;
19866
19867 bool isInc;
19868 bool isLegal = false;
19869 if (VT.isVector())
19870 isLegal = Subtarget->hasMVEIntegerOps() &&
19872 Ptr.getNode(), VT, Alignment, isSEXTLoad, IsMasked,
19873 Subtarget->isLittle(), Base, Offset, isInc, DAG);
19874 else {
19875 if (Subtarget->isThumb2())
19876 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19877 Offset, isInc, DAG);
19878 else
19879 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19880 Offset, isInc, DAG);
19881 }
19882 if (!isLegal)
19883 return false;
19884
19885 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
19886 return true;
19887}
19888
19889/// getPostIndexedAddressParts - returns true by value, base pointer and
19890/// offset pointer and addressing mode by reference if this node can be
19891/// combined with a load / store to form a post-indexed load / store.
19893 SDValue &Base,
19894 SDValue &Offset,
19896 SelectionDAG &DAG) const {
19897 EVT VT;
19898 SDValue Ptr;
19899 Align Alignment;
19900 bool isSEXTLoad = false, isNonExt;
19901 bool IsMasked = false;
19902 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19903 VT = LD->getMemoryVT();
19904 Ptr = LD->getBasePtr();
19905 Alignment = LD->getAlign();
19906 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19907 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19908 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19909 VT = ST->getMemoryVT();
19910 Ptr = ST->getBasePtr();
19911 Alignment = ST->getAlign();
19912 isNonExt = !ST->isTruncatingStore();
19913 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19914 VT = LD->getMemoryVT();
19915 Ptr = LD->getBasePtr();
19916 Alignment = LD->getAlign();
19917 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19918 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19919 IsMasked = true;
19920 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19921 VT = ST->getMemoryVT();
19922 Ptr = ST->getBasePtr();
19923 Alignment = ST->getAlign();
19924 isNonExt = !ST->isTruncatingStore();
19925 IsMasked = true;
19926 } else
19927 return false;
19928
19929 if (Subtarget->isThumb1Only()) {
19930 // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
19931 // must be non-extending/truncating, i32, with an offset of 4.
19932 assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
19933 if (Op->getOpcode() != ISD::ADD || !isNonExt)
19934 return false;
19935 auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
19936 if (!RHS || RHS->getZExtValue() != 4)
19937 return false;
19938 if (Alignment < Align(4))
19939 return false;
19940
19941 Offset = Op->getOperand(1);
19942 Base = Op->getOperand(0);
19943 AM = ISD::POST_INC;
19944 return true;
19945 }
19946
19947 bool isInc;
19948 bool isLegal = false;
19949 if (VT.isVector())
19950 isLegal = Subtarget->hasMVEIntegerOps() &&
19951 getMVEIndexedAddressParts(Op, VT, Alignment, isSEXTLoad, IsMasked,
19952 Subtarget->isLittle(), Base, Offset,
19953 isInc, DAG);
19954 else {
19955 if (Subtarget->isThumb2())
19956 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
19957 isInc, DAG);
19958 else
19959 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
19960 isInc, DAG);
19961 }
19962 if (!isLegal)
19963 return false;
19964
19965 if (Ptr != Base) {
19966 // Swap base ptr and offset to catch more post-index load / store when
19967 // it's legal. In Thumb2 mode, offset must be an immediate.
19968 if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
19969 !Subtarget->isThumb2())
19971
19972 // Post-indexed load / store update the base pointer.
19973 if (Ptr != Base)
19974 return false;
19975 }
19976
19977 AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
19978 return true;
19979}
19980
19982 KnownBits &Known,
19983 const APInt &DemandedElts,
19984 const SelectionDAG &DAG,
19985 unsigned Depth) const {
19986 unsigned BitWidth = Known.getBitWidth();
19987 Known.resetAll();
19988 switch (Op.getOpcode()) {
19989 default: break;
19990 case ARMISD::ADDC:
19991 case ARMISD::ADDE:
19992 case ARMISD::SUBC:
19993 case ARMISD::SUBE:
19994 // Special cases when we convert a carry to a boolean.
19995 if (Op.getResNo() == 0) {
19996 SDValue LHS = Op.getOperand(0);
19997 SDValue RHS = Op.getOperand(1);
19998 // (ADDE 0, 0, C) will give us a single bit.
19999 if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
20002 return;
20003 }
20004 }
20005 break;
20006 case ARMISD::CMOV: {
20007 // Bits are known zero/one if known on the LHS and RHS.
20008 Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
20009 if (Known.isUnknown())
20010 return;
20011
20012 KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
20013 Known = Known.intersectWith(KnownRHS);
20014 return;
20015 }
20017 Intrinsic::ID IntID =
20018 static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1));
20019 switch (IntID) {
20020 default: return;
20021 case Intrinsic::arm_ldaex:
20022 case Intrinsic::arm_ldrex: {
20023 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
20024 unsigned MemBits = VT.getScalarSizeInBits();
20025 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
20026 return;
20027 }
20028 }
20029 }
20030 case ARMISD::BFI: {
20031 // Conservatively, we can recurse down the first operand
20032 // and just mask out all affected bits.
20033 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
20034
20035 // The operand to BFI is already a mask suitable for removing the bits it
20036 // sets.
20037 const APInt &Mask = Op.getConstantOperandAPInt(2);
20038 Known.Zero &= Mask;
20039 Known.One &= Mask;
20040 return;
20041 }
20042 case ARMISD::VGETLANEs:
20043 case ARMISD::VGETLANEu: {
20044 const SDValue &SrcSV = Op.getOperand(0);
20045 EVT VecVT = SrcSV.getValueType();
20046 assert(VecVT.isVector() && "VGETLANE expected a vector type");
20047 const unsigned NumSrcElts = VecVT.getVectorNumElements();
20048 ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
20049 assert(Pos->getAPIntValue().ult(NumSrcElts) &&
20050 "VGETLANE index out of bounds");
20051 unsigned Idx = Pos->getZExtValue();
20052 APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
20053 Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
20054
20055 EVT VT = Op.getValueType();
20056 const unsigned DstSz = VT.getScalarSizeInBits();
20057 const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
20058 (void)SrcSz;
20059 assert(SrcSz == Known.getBitWidth());
20060 assert(DstSz > SrcSz);
20061 if (Op.getOpcode() == ARMISD::VGETLANEs)
20062 Known = Known.sext(DstSz);
20063 else {
20064 Known = Known.zext(DstSz);
20065 }
20066 assert(DstSz == Known.getBitWidth());
20067 break;
20068 }
20069 case ARMISD::VMOVrh: {
20070 KnownBits KnownOp = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20071 assert(KnownOp.getBitWidth() == 16);
20072 Known = KnownOp.zext(32);
20073 break;
20074 }
20075 case ARMISD::CSINC:
20076 case ARMISD::CSINV:
20077 case ARMISD::CSNEG: {
20078 KnownBits KnownOp0 = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20079 KnownBits KnownOp1 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
20080
20081 // The result is either:
20082 // CSINC: KnownOp0 or KnownOp1 + 1
20083 // CSINV: KnownOp0 or ~KnownOp1
20084 // CSNEG: KnownOp0 or KnownOp1 * -1
20085 if (Op.getOpcode() == ARMISD::CSINC)
20086 KnownOp1 =
20087 KnownBits::add(KnownOp1, KnownBits::makeConstant(APInt(32, 1)));
20088 else if (Op.getOpcode() == ARMISD::CSINV)
20089 std::swap(KnownOp1.Zero, KnownOp1.One);
20090 else if (Op.getOpcode() == ARMISD::CSNEG)
20091 KnownOp1 = KnownBits::mul(KnownOp1,
20093
20094 Known = KnownOp0.intersectWith(KnownOp1);
20095 break;
20096 }
20097 }
20098}
20099
20101 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
20102 TargetLoweringOpt &TLO) const {
20103 // Delay optimization, so we don't have to deal with illegal types, or block
20104 // optimizations.
20105 if (!TLO.LegalOps)
20106 return false;
20107
20108 // Only optimize AND for now.
20109 if (Op.getOpcode() != ISD::AND)
20110 return false;
20111
20112 EVT VT = Op.getValueType();
20113
20114 // Ignore vectors.
20115 if (VT.isVector())
20116 return false;
20117
20118 assert(VT == MVT::i32 && "Unexpected integer type");
20119
20120 // Make sure the RHS really is a constant.
20121 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
20122 if (!C)
20123 return false;
20124
20125 unsigned Mask = C->getZExtValue();
20126
20127 unsigned Demanded = DemandedBits.getZExtValue();
20128 unsigned ShrunkMask = Mask & Demanded;
20129 unsigned ExpandedMask = Mask | ~Demanded;
20130
20131 // If the mask is all zeros, let the target-independent code replace the
20132 // result with zero.
20133 if (ShrunkMask == 0)
20134 return false;
20135
20136 // If the mask is all ones, erase the AND. (Currently, the target-independent
20137 // code won't do this, so we have to do it explicitly to avoid an infinite
20138 // loop in obscure cases.)
20139 if (ExpandedMask == ~0U)
20140 return TLO.CombineTo(Op, Op.getOperand(0));
20141
20142 auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
20143 return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
20144 };
20145 auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
20146 if (NewMask == Mask)
20147 return true;
20148 SDLoc DL(Op);
20149 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
20150 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
20151 return TLO.CombineTo(Op, NewOp);
20152 };
20153
20154 // Prefer uxtb mask.
20155 if (IsLegalMask(0xFF))
20156 return UseMask(0xFF);
20157
20158 // Prefer uxth mask.
20159 if (IsLegalMask(0xFFFF))
20160 return UseMask(0xFFFF);
20161
20162 // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
20163 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20164 if (ShrunkMask < 256)
20165 return UseMask(ShrunkMask);
20166
20167 // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
20168 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20169 if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
20170 return UseMask(ExpandedMask);
20171
20172 // Potential improvements:
20173 //
20174 // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
20175 // We could try to prefer Thumb1 immediates which can be lowered to a
20176 // two-instruction sequence.
20177 // We could try to recognize more legal ARM/Thumb2 immediates here.
20178
20179 return false;
20180}
20181
20183 SDValue Op, const APInt &OriginalDemandedBits,
20184 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
20185 unsigned Depth) const {
20186 unsigned Opc = Op.getOpcode();
20187
20188 switch (Opc) {
20189 case ARMISD::ASRL:
20190 case ARMISD::LSRL: {
20191 // If this is result 0 and the other result is unused, see if the demand
20192 // bits allow us to shrink this long shift into a standard small shift in
20193 // the opposite direction.
20194 if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
20195 isa<ConstantSDNode>(Op->getOperand(2))) {
20196 unsigned ShAmt = Op->getConstantOperandVal(2);
20197 if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(APInt::getAllOnes(32)
20198 << (32 - ShAmt)))
20199 return TLO.CombineTo(
20200 Op, TLO.DAG.getNode(
20201 ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1),
20202 TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32)));
20203 }
20204 break;
20205 }
20206 case ARMISD::VBICIMM: {
20207 SDValue Op0 = Op.getOperand(0);
20208 unsigned ModImm = Op.getConstantOperandVal(1);
20209 unsigned EltBits = 0;
20210 uint64_t Mask = ARM_AM::decodeVMOVModImm(ModImm, EltBits);
20211 if ((OriginalDemandedBits & Mask) == 0)
20212 return TLO.CombineTo(Op, Op0);
20213 }
20214 }
20215
20217 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
20218}
20219
20220//===----------------------------------------------------------------------===//
20221// ARM Inline Assembly Support
20222//===----------------------------------------------------------------------===//
20223
20225 // Looking for "rev" which is V6+.
20226 if (!Subtarget->hasV6Ops())
20227 return false;
20228
20229 InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
20230 StringRef AsmStr = IA->getAsmString();
20231 SmallVector<StringRef, 4> AsmPieces;
20232 SplitString(AsmStr, AsmPieces, ";\n");
20233
20234 switch (AsmPieces.size()) {
20235 default: return false;
20236 case 1:
20237 AsmStr = AsmPieces[0];
20238 AsmPieces.clear();
20239 SplitString(AsmStr, AsmPieces, " \t,");
20240
20241 // rev $0, $1
20242 if (AsmPieces.size() == 3 &&
20243 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
20244 IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
20245 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
20246 if (Ty && Ty->getBitWidth() == 32)
20248 }
20249 break;
20250 }
20251
20252 return false;
20253}
20254
20255const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
20256 // At this point, we have to lower this constraint to something else, so we
20257 // lower it to an "r" or "w". However, by doing this we will force the result
20258 // to be in register, while the X constraint is much more permissive.
20259 //
20260 // Although we are correct (we are free to emit anything, without
20261 // constraints), we might break use cases that would expect us to be more
20262 // efficient and emit something else.
20263 if (!Subtarget->hasVFP2Base())
20264 return "r";
20265 if (ConstraintVT.isFloatingPoint())
20266 return "w";
20267 if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
20268 (ConstraintVT.getSizeInBits() == 64 ||
20269 ConstraintVT.getSizeInBits() == 128))
20270 return "w";
20271
20272 return "r";
20273}
20274
20275/// getConstraintType - Given a constraint letter, return the type of
20276/// constraint it is for this target.
20279 unsigned S = Constraint.size();
20280 if (S == 1) {
20281 switch (Constraint[0]) {
20282 default: break;
20283 case 'l': return C_RegisterClass;
20284 case 'w': return C_RegisterClass;
20285 case 'h': return C_RegisterClass;
20286 case 'x': return C_RegisterClass;
20287 case 't': return C_RegisterClass;
20288 case 'j': return C_Immediate; // Constant for movw.
20289 // An address with a single base register. Due to the way we
20290 // currently handle addresses it is the same as an 'r' memory constraint.
20291 case 'Q': return C_Memory;
20292 }
20293 } else if (S == 2) {
20294 switch (Constraint[0]) {
20295 default: break;
20296 case 'T': return C_RegisterClass;
20297 // All 'U+' constraints are addresses.
20298 case 'U': return C_Memory;
20299 }
20300 }
20301 return TargetLowering::getConstraintType(Constraint);
20302}
20303
20304/// Examine constraint type and operand type and determine a weight value.
20305/// This object must already have been set up with the operand type
20306/// and the current alternative constraint selected.
20309 AsmOperandInfo &info, const char *constraint) const {
20311 Value *CallOperandVal = info.CallOperandVal;
20312 // If we don't have a value, we can't do a match,
20313 // but allow it at the lowest weight.
20314 if (!CallOperandVal)
20315 return CW_Default;
20316 Type *type = CallOperandVal->getType();
20317 // Look at the constraint type.
20318 switch (*constraint) {
20319 default:
20321 break;
20322 case 'l':
20323 if (type->isIntegerTy()) {
20324 if (Subtarget->isThumb())
20325 weight = CW_SpecificReg;
20326 else
20327 weight = CW_Register;
20328 }
20329 break;
20330 case 'w':
20331 if (type->isFloatingPointTy())
20332 weight = CW_Register;
20333 break;
20334 }
20335 return weight;
20336}
20337
20338using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
20339
20341 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
20342 switch (Constraint.size()) {
20343 case 1:
20344 // GCC ARM Constraint Letters
20345 switch (Constraint[0]) {
20346 case 'l': // Low regs or general regs.
20347 if (Subtarget->isThumb())
20348 return RCPair(0U, &ARM::tGPRRegClass);
20349 return RCPair(0U, &ARM::GPRRegClass);
20350 case 'h': // High regs or no regs.
20351 if (Subtarget->isThumb())
20352 return RCPair(0U, &ARM::hGPRRegClass);
20353 break;
20354 case 'r':
20355 if (Subtarget->isThumb1Only())
20356 return RCPair(0U, &ARM::tGPRRegClass);
20357 return RCPair(0U, &ARM::GPRRegClass);
20358 case 'w':
20359 if (VT == MVT::Other)
20360 break;
20361 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20362 return RCPair(0U, &ARM::SPRRegClass);
20363 if (VT.getSizeInBits() == 64)
20364 return RCPair(0U, &ARM::DPRRegClass);
20365 if (VT.getSizeInBits() == 128)
20366 return RCPair(0U, &ARM::QPRRegClass);
20367 break;
20368 case 'x':
20369 if (VT == MVT::Other)
20370 break;
20371 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20372 return RCPair(0U, &ARM::SPR_8RegClass);
20373 if (VT.getSizeInBits() == 64)
20374 return RCPair(0U, &ARM::DPR_8RegClass);
20375 if (VT.getSizeInBits() == 128)
20376 return RCPair(0U, &ARM::QPR_8RegClass);
20377 break;
20378 case 't':
20379 if (VT == MVT::Other)
20380 break;
20381 if (VT == MVT::f32 || VT == MVT::i32 || VT == MVT::f16 || VT == MVT::bf16)
20382 return RCPair(0U, &ARM::SPRRegClass);
20383 if (VT.getSizeInBits() == 64)
20384 return RCPair(0U, &ARM::DPR_VFP2RegClass);
20385 if (VT.getSizeInBits() == 128)
20386 return RCPair(0U, &ARM::QPR_VFP2RegClass);
20387 break;
20388 }
20389 break;
20390
20391 case 2:
20392 if (Constraint[0] == 'T') {
20393 switch (Constraint[1]) {
20394 default:
20395 break;
20396 case 'e':
20397 return RCPair(0U, &ARM::tGPREvenRegClass);
20398 case 'o':
20399 return RCPair(0U, &ARM::tGPROddRegClass);
20400 }
20401 }
20402 break;
20403
20404 default:
20405 break;
20406 }
20407
20408 if (StringRef("{cc}").equals_insensitive(Constraint))
20409 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
20410
20411 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
20412}
20413
20414/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
20415/// vector. If it is invalid, don't add anything to Ops.
20417 StringRef Constraint,
20418 std::vector<SDValue> &Ops,
20419 SelectionDAG &DAG) const {
20420 SDValue Result;
20421
20422 // Currently only support length 1 constraints.
20423 if (Constraint.size() != 1)
20424 return;
20425
20426 char ConstraintLetter = Constraint[0];
20427 switch (ConstraintLetter) {
20428 default: break;
20429 case 'j':
20430 case 'I': case 'J': case 'K': case 'L':
20431 case 'M': case 'N': case 'O':
20432 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
20433 if (!C)
20434 return;
20435
20436 int64_t CVal64 = C->getSExtValue();
20437 int CVal = (int) CVal64;
20438 // None of these constraints allow values larger than 32 bits. Check
20439 // that the value fits in an int.
20440 if (CVal != CVal64)
20441 return;
20442
20443 switch (ConstraintLetter) {
20444 case 'j':
20445 // Constant suitable for movw, must be between 0 and
20446 // 65535.
20447 if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
20448 if (CVal >= 0 && CVal <= 65535)
20449 break;
20450 return;
20451 case 'I':
20452 if (Subtarget->isThumb1Only()) {
20453 // This must be a constant between 0 and 255, for ADD
20454 // immediates.
20455 if (CVal >= 0 && CVal <= 255)
20456 break;
20457 } else if (Subtarget->isThumb2()) {
20458 // A constant that can be used as an immediate value in a
20459 // data-processing instruction.
20460 if (ARM_AM::getT2SOImmVal(CVal) != -1)
20461 break;
20462 } else {
20463 // A constant that can be used as an immediate value in a
20464 // data-processing instruction.
20465 if (ARM_AM::getSOImmVal(CVal) != -1)
20466 break;
20467 }
20468 return;
20469
20470 case 'J':
20471 if (Subtarget->isThumb1Only()) {
20472 // This must be a constant between -255 and -1, for negated ADD
20473 // immediates. This can be used in GCC with an "n" modifier that
20474 // prints the negated value, for use with SUB instructions. It is
20475 // not useful otherwise but is implemented for compatibility.
20476 if (CVal >= -255 && CVal <= -1)
20477 break;
20478 } else {
20479 // This must be a constant between -4095 and 4095. It is not clear
20480 // what this constraint is intended for. Implemented for
20481 // compatibility with GCC.
20482 if (CVal >= -4095 && CVal <= 4095)
20483 break;
20484 }
20485 return;
20486
20487 case 'K':
20488 if (Subtarget->isThumb1Only()) {
20489 // A 32-bit value where only one byte has a nonzero value. Exclude
20490 // zero to match GCC. This constraint is used by GCC internally for
20491 // constants that can be loaded with a move/shift combination.
20492 // It is not useful otherwise but is implemented for compatibility.
20493 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
20494 break;
20495 } else if (Subtarget->isThumb2()) {
20496 // A constant whose bitwise inverse can be used as an immediate
20497 // value in a data-processing instruction. This can be used in GCC
20498 // with a "B" modifier that prints the inverted value, for use with
20499 // BIC and MVN instructions. It is not useful otherwise but is
20500 // implemented for compatibility.
20501 if (ARM_AM::getT2SOImmVal(~CVal) != -1)
20502 break;
20503 } else {
20504 // A constant whose bitwise inverse can be used as an immediate
20505 // value in a data-processing instruction. This can be used in GCC
20506 // with a "B" modifier that prints the inverted value, for use with
20507 // BIC and MVN instructions. It is not useful otherwise but is
20508 // implemented for compatibility.
20509 if (ARM_AM::getSOImmVal(~CVal) != -1)
20510 break;
20511 }
20512 return;
20513
20514 case 'L':
20515 if (Subtarget->isThumb1Only()) {
20516 // This must be a constant between -7 and 7,
20517 // for 3-operand ADD/SUB immediate instructions.
20518 if (CVal >= -7 && CVal < 7)
20519 break;
20520 } else if (Subtarget->isThumb2()) {
20521 // A constant whose negation can be used as an immediate value in a
20522 // data-processing instruction. This can be used in GCC with an "n"
20523 // modifier that prints the negated value, for use with SUB
20524 // instructions. It is not useful otherwise but is implemented for
20525 // compatibility.
20526 if (ARM_AM::getT2SOImmVal(-CVal) != -1)
20527 break;
20528 } else {
20529 // A constant whose negation can be used as an immediate value in a
20530 // data-processing instruction. This can be used in GCC with an "n"
20531 // modifier that prints the negated value, for use with SUB
20532 // instructions. It is not useful otherwise but is implemented for
20533 // compatibility.
20534 if (ARM_AM::getSOImmVal(-CVal) != -1)
20535 break;
20536 }
20537 return;
20538
20539 case 'M':
20540 if (Subtarget->isThumb1Only()) {
20541 // This must be a multiple of 4 between 0 and 1020, for
20542 // ADD sp + immediate.
20543 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
20544 break;
20545 } else {
20546 // A power of two or a constant between 0 and 32. This is used in
20547 // GCC for the shift amount on shifted register operands, but it is
20548 // useful in general for any shift amounts.
20549 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
20550 break;
20551 }
20552 return;
20553
20554 case 'N':
20555 if (Subtarget->isThumb1Only()) {
20556 // This must be a constant between 0 and 31, for shift amounts.
20557 if (CVal >= 0 && CVal <= 31)
20558 break;
20559 }
20560 return;
20561
20562 case 'O':
20563 if (Subtarget->isThumb1Only()) {
20564 // This must be a multiple of 4 between -508 and 508, for
20565 // ADD/SUB sp = sp + immediate.
20566 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
20567 break;
20568 }
20569 return;
20570 }
20571 Result = DAG.getSignedTargetConstant(CVal, SDLoc(Op), Op.getValueType());
20572 break;
20573 }
20574
20575 if (Result.getNode()) {
20576 Ops.push_back(Result);
20577 return;
20578 }
20579 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20580}
20581
20583 const SDNode *N, MVT::SimpleValueType SVT) {
20584 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20585 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20586 "Unhandled Opcode in getDivRemLibcall");
20587 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20588 N->getOpcode() == ISD::SREM;
20589 RTLIB::Libcall LC;
20590 switch (SVT) {
20591 default: llvm_unreachable("Unexpected request for libcall!");
20592 case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
20593 case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
20594 case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
20595 case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
20596 }
20597 return LC;
20598}
20599
20601 const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
20602 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20603 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20604 "Unhandled Opcode in getDivRemArgList");
20605 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20606 N->getOpcode() == ISD::SREM;
20609 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20610 EVT ArgVT = N->getOperand(i).getValueType();
20611 Type *ArgTy = ArgVT.getTypeForEVT(*Context);
20612 Entry.Node = N->getOperand(i);
20613 Entry.Ty = ArgTy;
20614 Entry.IsSExt = isSigned;
20615 Entry.IsZExt = !isSigned;
20616 Args.push_back(Entry);
20617 }
20618 if (Subtarget->isTargetWindows() && Args.size() >= 2)
20619 std::swap(Args[0], Args[1]);
20620 return Args;
20621}
20622
20623SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
20624 assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
20625 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
20626 Subtarget->isTargetWindows()) &&
20627 "Register-based DivRem lowering only");
20628 unsigned Opcode = Op->getOpcode();
20629 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
20630 "Invalid opcode for Div/Rem lowering");
20631 bool isSigned = (Opcode == ISD::SDIVREM);
20632 EVT VT = Op->getValueType(0);
20633 SDLoc dl(Op);
20634
20635 if (VT == MVT::i64 && isa<ConstantSDNode>(Op.getOperand(1))) {
20637 if (expandDIVREMByConstant(Op.getNode(), Result, MVT::i32, DAG)) {
20638 SDValue Res0 =
20639 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[0], Result[1]);
20640 SDValue Res1 =
20641 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[2], Result[3]);
20642 return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
20643 {Res0, Res1});
20644 }
20645 }
20646
20647 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
20648
20649 // If the target has hardware divide, use divide + multiply + subtract:
20650 // div = a / b
20651 // rem = a - b * div
20652 // return {div, rem}
20653 // This should be lowered into UDIV/SDIV + MLS later on.
20654 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
20655 : Subtarget->hasDivideInARMMode();
20656 if (hasDivide && Op->getValueType(0).isSimple() &&
20657 Op->getSimpleValueType(0) == MVT::i32) {
20658 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
20659 const SDValue Dividend = Op->getOperand(0);
20660 const SDValue Divisor = Op->getOperand(1);
20661 SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
20662 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
20663 SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
20664
20665 SDValue Values[2] = {Div, Rem};
20666 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
20667 }
20668
20669 RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
20670 VT.getSimpleVT().SimpleTy);
20671 SDValue InChain = DAG.getEntryNode();
20672
20674 DAG.getContext(),
20675 Subtarget);
20676
20679
20680 Type *RetTy = StructType::get(Ty, Ty);
20681
20682 if (Subtarget->isTargetWindows())
20683 InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
20684
20686 CLI.setDebugLoc(dl).setChain(InChain)
20687 .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
20689
20690 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
20691 return CallInfo.first;
20692}
20693
20694// Lowers REM using divmod helpers
20695// see RTABI section 4.2/4.3
20696SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
20697 EVT VT = N->getValueType(0);
20698
20699 if (VT == MVT::i64 && isa<ConstantSDNode>(N->getOperand(1))) {
20701 if (expandDIVREMByConstant(N, Result, MVT::i32, DAG))
20702 return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), N->getValueType(0),
20703 Result[0], Result[1]);
20704 }
20705
20706 // Build return types (div and rem)
20707 std::vector<Type*> RetTyParams;
20708 Type *RetTyElement;
20709
20710 switch (VT.getSimpleVT().SimpleTy) {
20711 default: llvm_unreachable("Unexpected request for libcall!");
20712 case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
20713 case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
20714 case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
20715 case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
20716 }
20717
20718 RetTyParams.push_back(RetTyElement);
20719 RetTyParams.push_back(RetTyElement);
20720 ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
20721 Type *RetTy = StructType::get(*DAG.getContext(), ret);
20722
20723 RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
20724 SimpleTy);
20725 SDValue InChain = DAG.getEntryNode();
20727 Subtarget);
20728 bool isSigned = N->getOpcode() == ISD::SREM;
20731
20732 if (Subtarget->isTargetWindows())
20733 InChain = WinDBZCheckDenominator(DAG, N, InChain);
20734
20735 // Lower call
20736 CallLoweringInfo CLI(DAG);
20737 CLI.setChain(InChain)
20738 .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
20740 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
20741
20742 // Return second (rem) result operand (first contains div)
20743 SDNode *ResNode = CallResult.first.getNode();
20744 assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
20745 return ResNode->getOperand(1);
20746}
20747
20748SDValue
20749ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
20750 assert(Subtarget->isTargetWindows() && "unsupported target platform");
20751 SDLoc DL(Op);
20752
20753 // Get the inputs.
20754 SDValue Chain = Op.getOperand(0);
20755 SDValue Size = Op.getOperand(1);
20756
20758 "no-stack-arg-probe")) {
20760 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
20761 SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20762 Chain = SP.getValue(1);
20763 SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);
20764 if (Align)
20765 SP =
20766 DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
20767 DAG.getConstant(-(uint64_t)Align->value(), DL, MVT::i32));
20768 Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);
20769 SDValue Ops[2] = { SP, Chain };
20770 return DAG.getMergeValues(Ops, DL);
20771 }
20772
20773 SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
20774 DAG.getConstant(2, DL, MVT::i32));
20775
20776 SDValue Glue;
20777 Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Glue);
20778 Glue = Chain.getValue(1);
20779
20780 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20781 Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Glue);
20782
20783 SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20784 Chain = NewSP.getValue(1);
20785
20786 SDValue Ops[2] = { NewSP, Chain };
20787 return DAG.getMergeValues(Ops, DL);
20788}
20789
20790SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
20791 bool IsStrict = Op->isStrictFPOpcode();
20792 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20793 const unsigned DstSz = Op.getValueType().getSizeInBits();
20794 const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
20795 assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
20796 "Unexpected type for custom-lowering FP_EXTEND");
20797
20798 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20799 "With both FP DP and 16, any FP conversion is legal!");
20800
20801 assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
20802 "With FP16, 16 to 32 conversion is legal!");
20803
20804 // Converting from 32 -> 64 is valid if we have FP64.
20805 if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
20806 // FIXME: Remove this when we have strict fp instruction selection patterns
20807 if (IsStrict) {
20808 SDLoc Loc(Op);
20810 Loc, Op.getValueType(), SrcVal);
20811 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
20812 }
20813 return Op;
20814 }
20815
20816 // Either we are converting from 16 -> 64, without FP16 and/or
20817 // FP.double-precision or without Armv8-fp. So we must do it in two
20818 // steps.
20819 // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
20820 // without FP16. So we must do a function call.
20821 SDLoc Loc(Op);
20822 RTLIB::Libcall LC;
20823 MakeLibCallOptions CallOptions;
20824 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20825 for (unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {
20826 bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
20827 MVT SrcVT = (Sz == 16 ? MVT::f16 : MVT::f32);
20828 MVT DstVT = (Sz == 16 ? MVT::f32 : MVT::f64);
20829 if (Supported) {
20830 if (IsStrict) {
20831 SrcVal = DAG.getNode(ISD::STRICT_FP_EXTEND, Loc,
20832 {DstVT, MVT::Other}, {Chain, SrcVal});
20833 Chain = SrcVal.getValue(1);
20834 } else {
20835 SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, DstVT, SrcVal);
20836 }
20837 } else {
20838 LC = RTLIB::getFPEXT(SrcVT, DstVT);
20839 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20840 "Unexpected type for custom-lowering FP_EXTEND");
20841 std::tie(SrcVal, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20842 Loc, Chain);
20843 }
20844 }
20845
20846 return IsStrict ? DAG.getMergeValues({SrcVal, Chain}, Loc) : SrcVal;
20847}
20848
20849SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
20850 bool IsStrict = Op->isStrictFPOpcode();
20851
20852 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20853 EVT SrcVT = SrcVal.getValueType();
20854 EVT DstVT = Op.getValueType();
20855 const unsigned DstSz = Op.getValueType().getSizeInBits();
20856 const unsigned SrcSz = SrcVT.getSizeInBits();
20857 (void)DstSz;
20858 assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&
20859 "Unexpected type for custom-lowering FP_ROUND");
20860
20861 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20862 "With both FP DP and 16, any FP conversion is legal!");
20863
20864 SDLoc Loc(Op);
20865
20866 // Instruction from 32 -> 16 if hasFP16 is valid
20867 if (SrcSz == 32 && Subtarget->hasFP16())
20868 return Op;
20869
20870 // Lib call from 32 -> 16 / 64 -> [32, 16]
20871 RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
20872 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20873 "Unexpected type for custom-lowering FP_ROUND");
20874 MakeLibCallOptions CallOptions;
20875 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20877 std::tie(Result, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20878 Loc, Chain);
20879 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
20880}
20881
20882bool
20884 // The ARM target isn't yet aware of offsets.
20885 return false;
20886}
20887
20889 if (v == 0xffffffff)
20890 return false;
20891
20892 // there can be 1's on either or both "outsides", all the "inside"
20893 // bits must be 0's
20894 return isShiftedMask_32(~v);
20895}
20896
20897/// isFPImmLegal - Returns true if the target can instruction select the
20898/// specified FP immediate natively. If false, the legalizer will
20899/// materialize the FP immediate as a load from a constant pool.
20901 bool ForCodeSize) const {
20902 if (!Subtarget->hasVFP3Base())
20903 return false;
20904 if (VT == MVT::f16 && Subtarget->hasFullFP16())
20905 return ARM_AM::getFP16Imm(Imm) != -1;
20906 if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
20907 ARM_AM::getFP32FP16Imm(Imm) != -1)
20908 return true;
20909 if (VT == MVT::f32)
20910 return ARM_AM::getFP32Imm(Imm) != -1;
20911 if (VT == MVT::f64 && Subtarget->hasFP64())
20912 return ARM_AM::getFP64Imm(Imm) != -1;
20913 return false;
20914}
20915
20916/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
20917/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
20918/// specified in the intrinsic calls.
20920 const CallInst &I,
20921 MachineFunction &MF,
20922 unsigned Intrinsic) const {
20923 switch (Intrinsic) {
20924 case Intrinsic::arm_neon_vld1:
20925 case Intrinsic::arm_neon_vld2:
20926 case Intrinsic::arm_neon_vld3:
20927 case Intrinsic::arm_neon_vld4:
20928 case Intrinsic::arm_neon_vld2lane:
20929 case Intrinsic::arm_neon_vld3lane:
20930 case Intrinsic::arm_neon_vld4lane:
20931 case Intrinsic::arm_neon_vld2dup:
20932 case Intrinsic::arm_neon_vld3dup:
20933 case Intrinsic::arm_neon_vld4dup: {
20935 // Conservatively set memVT to the entire set of vectors loaded.
20936 auto &DL = I.getDataLayout();
20937 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
20938 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20939 Info.ptrVal = I.getArgOperand(0);
20940 Info.offset = 0;
20941 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
20942 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
20943 // volatile loads with NEON intrinsics not supported
20945 return true;
20946 }
20947 case Intrinsic::arm_neon_vld1x2:
20948 case Intrinsic::arm_neon_vld1x3:
20949 case Intrinsic::arm_neon_vld1x4: {
20951 // Conservatively set memVT to the entire set of vectors loaded.
20952 auto &DL = I.getDataLayout();
20953 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
20954 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20955 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
20956 Info.offset = 0;
20957 Info.align = I.getParamAlign(I.arg_size() - 1).valueOrOne();
20958 // volatile loads with NEON intrinsics not supported
20960 return true;
20961 }
20962 case Intrinsic::arm_neon_vst1:
20963 case Intrinsic::arm_neon_vst2:
20964 case Intrinsic::arm_neon_vst3:
20965 case Intrinsic::arm_neon_vst4:
20966 case Intrinsic::arm_neon_vst2lane:
20967 case Intrinsic::arm_neon_vst3lane:
20968 case Intrinsic::arm_neon_vst4lane: {
20970 // Conservatively set memVT to the entire set of vectors stored.
20971 auto &DL = I.getDataLayout();
20972 unsigned NumElts = 0;
20973 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
20974 Type *ArgTy = I.getArgOperand(ArgI)->getType();
20975 if (!ArgTy->isVectorTy())
20976 break;
20977 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
20978 }
20979 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20980 Info.ptrVal = I.getArgOperand(0);
20981 Info.offset = 0;
20982 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
20983 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
20984 // volatile stores with NEON intrinsics not supported
20986 return true;
20987 }
20988 case Intrinsic::arm_neon_vst1x2:
20989 case Intrinsic::arm_neon_vst1x3:
20990 case Intrinsic::arm_neon_vst1x4: {
20992 // Conservatively set memVT to the entire set of vectors stored.
20993 auto &DL = I.getDataLayout();
20994 unsigned NumElts = 0;
20995 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
20996 Type *ArgTy = I.getArgOperand(ArgI)->getType();
20997 if (!ArgTy->isVectorTy())
20998 break;
20999 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
21000 }
21001 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21002 Info.ptrVal = I.getArgOperand(0);
21003 Info.offset = 0;
21004 Info.align = I.getParamAlign(0).valueOrOne();
21005 // volatile stores with NEON intrinsics not supported
21007 return true;
21008 }
21009 case Intrinsic::arm_mve_vld2q:
21010 case Intrinsic::arm_mve_vld4q: {
21012 // Conservatively set memVT to the entire set of vectors loaded.
21013 Type *VecTy = cast<StructType>(I.getType())->getElementType(1);
21014 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vld2q ? 2 : 4;
21015 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21016 Info.ptrVal = I.getArgOperand(0);
21017 Info.offset = 0;
21018 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21019 // volatile loads with MVE intrinsics not supported
21021 return true;
21022 }
21023 case Intrinsic::arm_mve_vst2q:
21024 case Intrinsic::arm_mve_vst4q: {
21026 // Conservatively set memVT to the entire set of vectors stored.
21027 Type *VecTy = I.getArgOperand(1)->getType();
21028 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vst2q ? 2 : 4;
21029 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21030 Info.ptrVal = I.getArgOperand(0);
21031 Info.offset = 0;
21032 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21033 // volatile stores with MVE intrinsics not supported
21035 return true;
21036 }
21037 case Intrinsic::arm_mve_vldr_gather_base:
21038 case Intrinsic::arm_mve_vldr_gather_base_predicated: {
21040 Info.ptrVal = nullptr;
21041 Info.memVT = MVT::getVT(I.getType());
21042 Info.align = Align(1);
21044 return true;
21045 }
21046 case Intrinsic::arm_mve_vldr_gather_base_wb:
21047 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
21049 Info.ptrVal = nullptr;
21050 Info.memVT = MVT::getVT(I.getType()->getContainedType(0));
21051 Info.align = Align(1);
21053 return true;
21054 }
21055 case Intrinsic::arm_mve_vldr_gather_offset:
21056 case Intrinsic::arm_mve_vldr_gather_offset_predicated: {
21058 Info.ptrVal = nullptr;
21059 MVT DataVT = MVT::getVT(I.getType());
21060 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
21061 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21062 DataVT.getVectorNumElements());
21063 Info.align = Align(1);
21065 return true;
21066 }
21067 case Intrinsic::arm_mve_vstr_scatter_base:
21068 case Intrinsic::arm_mve_vstr_scatter_base_predicated: {
21070 Info.ptrVal = nullptr;
21071 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21072 Info.align = Align(1);
21074 return true;
21075 }
21076 case Intrinsic::arm_mve_vstr_scatter_base_wb:
21077 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: {
21079 Info.ptrVal = nullptr;
21080 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21081 Info.align = Align(1);
21083 return true;
21084 }
21085 case Intrinsic::arm_mve_vstr_scatter_offset:
21086 case Intrinsic::arm_mve_vstr_scatter_offset_predicated: {
21088 Info.ptrVal = nullptr;
21089 MVT DataVT = MVT::getVT(I.getArgOperand(2)->getType());
21090 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
21091 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21092 DataVT.getVectorNumElements());
21093 Info.align = Align(1);
21095 return true;
21096 }
21097 case Intrinsic::arm_ldaex:
21098 case Intrinsic::arm_ldrex: {
21099 auto &DL = I.getDataLayout();
21100 Type *ValTy = I.getParamElementType(0);
21102 Info.memVT = MVT::getVT(ValTy);
21103 Info.ptrVal = I.getArgOperand(0);
21104 Info.offset = 0;
21105 Info.align = DL.getABITypeAlign(ValTy);
21107 return true;
21108 }
21109 case Intrinsic::arm_stlex:
21110 case Intrinsic::arm_strex: {
21111 auto &DL = I.getDataLayout();
21112 Type *ValTy = I.getParamElementType(1);
21114 Info.memVT = MVT::getVT(ValTy);
21115 Info.ptrVal = I.getArgOperand(1);
21116 Info.offset = 0;
21117 Info.align = DL.getABITypeAlign(ValTy);
21119 return true;
21120 }
21121 case Intrinsic::arm_stlexd:
21122 case Intrinsic::arm_strexd:
21124 Info.memVT = MVT::i64;
21125 Info.ptrVal = I.getArgOperand(2);
21126 Info.offset = 0;
21127 Info.align = Align(8);
21129 return true;
21130
21131 case Intrinsic::arm_ldaexd:
21132 case Intrinsic::arm_ldrexd:
21134 Info.memVT = MVT::i64;
21135 Info.ptrVal = I.getArgOperand(0);
21136 Info.offset = 0;
21137 Info.align = Align(8);
21139 return true;
21140
21141 default:
21142 break;
21143 }
21144
21145 return false;
21146}
21147
21148/// Returns true if it is beneficial to convert a load of a constant
21149/// to just the constant itself.
21151 Type *Ty) const {
21152 assert(Ty->isIntegerTy());
21153
21154 unsigned Bits = Ty->getPrimitiveSizeInBits();
21155 if (Bits == 0 || Bits > 32)
21156 return false;
21157 return true;
21158}
21159
21161 unsigned Index) const {
21163 return false;
21164
21165 return (Index == 0 || Index == ResVT.getVectorNumElements());
21166}
21167
21169 ARM_MB::MemBOpt Domain) const {
21170 // First, if the target has no DMB, see what fallback we can use.
21171 if (!Subtarget->hasDataBarrier()) {
21172 // Some ARMv6 cpus can support data barriers with an mcr instruction.
21173 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
21174 // here.
21175 if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
21176 Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
21177 Builder.getInt32(0), Builder.getInt32(7),
21178 Builder.getInt32(10), Builder.getInt32(5)};
21179 return Builder.CreateIntrinsic(Intrinsic::arm_mcr, {}, args);
21180 } else {
21181 // Instead of using barriers, atomic accesses on these subtargets use
21182 // libcalls.
21183 llvm_unreachable("makeDMB on a target so old that it has no barriers");
21184 }
21185 } else {
21186 // Only a full system barrier exists in the M-class architectures.
21187 Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
21188 Constant *CDomain = Builder.getInt32(Domain);
21189 return Builder.CreateIntrinsic(Intrinsic::arm_dmb, {}, CDomain);
21190 }
21191}
21192
21193// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
21195 Instruction *Inst,
21196 AtomicOrdering Ord) const {
21197 switch (Ord) {
21200 llvm_unreachable("Invalid fence: unordered/non-atomic");
21203 return nullptr; // Nothing to do
21205 if (!Inst->hasAtomicStore())
21206 return nullptr; // Nothing to do
21207 [[fallthrough]];
21210 if (Subtarget->preferISHSTBarriers())
21211 return makeDMB(Builder, ARM_MB::ISHST);
21212 // FIXME: add a comment with a link to documentation justifying this.
21213 else
21214 return makeDMB(Builder, ARM_MB::ISH);
21215 }
21216 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
21217}
21218
21220 Instruction *Inst,
21221 AtomicOrdering Ord) const {
21222 switch (Ord) {
21225 llvm_unreachable("Invalid fence: unordered/not-atomic");
21228 return nullptr; // Nothing to do
21232 return makeDMB(Builder, ARM_MB::ISH);
21233 }
21234 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
21235}
21236
21237// Loads and stores less than 64-bits are already atomic; ones above that
21238// are doomed anyway, so defer to the default libcall and blame the OS when
21239// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21240// anything for those.
21243 bool has64BitAtomicStore;
21244 if (Subtarget->isMClass())
21245 has64BitAtomicStore = false;
21246 else if (Subtarget->isThumb())
21247 has64BitAtomicStore = Subtarget->hasV7Ops();
21248 else
21249 has64BitAtomicStore = Subtarget->hasV6Ops();
21250
21251 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
21252 return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand
21254}
21255
21256// Loads and stores less than 64-bits are already atomic; ones above that
21257// are doomed anyway, so defer to the default libcall and blame the OS when
21258// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21259// anything for those.
21260// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
21261// guarantee, see DDI0406C ARM architecture reference manual,
21262// sections A8.8.72-74 LDRD)
21265 bool has64BitAtomicLoad;
21266 if (Subtarget->isMClass())
21267 has64BitAtomicLoad = false;
21268 else if (Subtarget->isThumb())
21269 has64BitAtomicLoad = Subtarget->hasV7Ops();
21270 else
21271 has64BitAtomicLoad = Subtarget->hasV6Ops();
21272
21273 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
21274 return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly
21276}
21277
21278// For the real atomic operations, we have ldrex/strex up to 32 bits,
21279// and up to 64 bits on the non-M profiles
21282 if (AI->isFloatingPointOperation())
21284
21285 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21286 bool hasAtomicRMW;
21287 if (Subtarget->isMClass())
21288 hasAtomicRMW = Subtarget->hasV8MBaselineOps();
21289 else if (Subtarget->isThumb())
21290 hasAtomicRMW = Subtarget->hasV7Ops();
21291 else
21292 hasAtomicRMW = Subtarget->hasV6Ops();
21293 if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
21294 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21295 // implement atomicrmw without spilling. If the target address is also on
21296 // the stack and close enough to the spill slot, this can lead to a
21297 // situation where the monitor always gets cleared and the atomic operation
21298 // can never succeed. So at -O0 lower this operation to a CAS loop.
21299 if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None)
21302 }
21304}
21305
21306// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
21307// bits, and up to 64 bits on the non-M profiles.
21310 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21311 // implement cmpxchg without spilling. If the address being exchanged is also
21312 // on the stack and close enough to the spill slot, this can lead to a
21313 // situation where the monitor always gets cleared and the atomic operation
21314 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
21315 unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
21316 bool HasAtomicCmpXchg;
21317 if (Subtarget->isMClass())
21318 HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
21319 else if (Subtarget->isThumb())
21320 HasAtomicCmpXchg = Subtarget->hasV7Ops();
21321 else
21322 HasAtomicCmpXchg = Subtarget->hasV6Ops();
21323 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None &&
21324 HasAtomicCmpXchg && Size <= (Subtarget->isMClass() ? 32U : 64U))
21327}
21328
21330 const Instruction *I) const {
21331 return InsertFencesForAtomic;
21332}
21333
21335 // ROPI/RWPI are not supported currently.
21336 return !Subtarget->isROPI() && !Subtarget->isRWPI();
21337}
21338
21340 if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21342
21343 // MSVC CRT has a global variable holding security cookie.
21344 M.getOrInsertGlobal("__security_cookie",
21345 PointerType::getUnqual(M.getContext()));
21346
21347 // MSVC CRT has a function to validate security cookie.
21348 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
21349 "__security_check_cookie", Type::getVoidTy(M.getContext()),
21350 PointerType::getUnqual(M.getContext()));
21351 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
21352 F->addParamAttr(0, Attribute::AttrKind::InReg);
21353}
21354
21356 // MSVC CRT has a global variable holding security cookie.
21357 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21358 return M.getGlobalVariable("__security_cookie");
21360}
21361
21363 // MSVC CRT has a function to validate security cookie.
21364 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21365 return M.getFunction("__security_check_cookie");
21367}
21368
21370 unsigned &Cost) const {
21371 // If we do not have NEON, vector types are not natively supported.
21372 if (!Subtarget->hasNEON())
21373 return false;
21374
21375 // Floating point values and vector values map to the same register file.
21376 // Therefore, although we could do a store extract of a vector type, this is
21377 // better to leave at float as we have more freedom in the addressing mode for
21378 // those.
21379 if (VectorTy->isFPOrFPVectorTy())
21380 return false;
21381
21382 // If the index is unknown at compile time, this is very expensive to lower
21383 // and it is not possible to combine the store with the extract.
21384 if (!isa<ConstantInt>(Idx))
21385 return false;
21386
21387 assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
21388 unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedValue();
21389 // We can do a store + vector extract on any vector that fits perfectly in a D
21390 // or Q register.
21391 if (BitWidth == 64 || BitWidth == 128) {
21392 Cost = 0;
21393 return true;
21394 }
21395 return false;
21396}
21397
21399 return Subtarget->hasV6T2Ops();
21400}
21401
21403 return Subtarget->hasV6T2Ops();
21404}
21405
21407 const Instruction &AndI) const {
21408 if (!Subtarget->hasV7Ops())
21409 return false;
21410
21411 // Sink the `and` instruction only if the mask would fit into a modified
21412 // immediate operand.
21413 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
21414 if (!Mask || Mask->getValue().getBitWidth() > 32u)
21415 return false;
21416 auto MaskVal = unsigned(Mask->getValue().getZExtValue());
21417 return (Subtarget->isThumb2() ? ARM_AM::getT2SOImmVal(MaskVal)
21418 : ARM_AM::getSOImmVal(MaskVal)) != -1;
21419}
21420
21423 SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const {
21424 if (Subtarget->hasMinSize() && !Subtarget->isTargetWindows())
21427 ExpansionFactor);
21428}
21429
21431 Value *Addr,
21432 AtomicOrdering Ord) const {
21433 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21434 bool IsAcquire = isAcquireOrStronger(Ord);
21435
21436 // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
21437 // intrinsic must return {i32, i32} and we have to recombine them into a
21438 // single i64 here.
21439 if (ValueTy->getPrimitiveSizeInBits() == 64) {
21441 IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
21442
21443 Value *LoHi =
21444 Builder.CreateIntrinsic(Int, {}, Addr, /*FMFSource=*/nullptr, "lohi");
21445
21446 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
21447 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
21448 if (!Subtarget->isLittle())
21449 std::swap (Lo, Hi);
21450 Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
21451 Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
21452 return Builder.CreateOr(
21453 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 32)), "val64");
21454 }
21455
21456 Type *Tys[] = { Addr->getType() };
21457 Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
21458 CallInst *CI = Builder.CreateIntrinsic(Int, Tys, Addr);
21459
21460 CI->addParamAttr(
21461 0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));
21462 return Builder.CreateTruncOrBitCast(CI, ValueTy);
21463}
21464
21466 IRBuilderBase &Builder) const {
21467 if (!Subtarget->hasV7Ops())
21468 return;
21469 Builder.CreateIntrinsic(Intrinsic::arm_clrex, {}, {});
21470}
21471
21473 Value *Val, Value *Addr,
21474 AtomicOrdering Ord) const {
21475 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21476 bool IsRelease = isReleaseOrStronger(Ord);
21477
21478 // Since the intrinsics must have legal type, the i64 intrinsics take two
21479 // parameters: "i32, i32". We must marshal Val into the appropriate form
21480 // before the call.
21481 if (Val->getType()->getPrimitiveSizeInBits() == 64) {
21483 IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
21484 Type *Int32Ty = Type::getInt32Ty(M->getContext());
21485
21486 Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
21487 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
21488 if (!Subtarget->isLittle())
21489 std::swap(Lo, Hi);
21490 return Builder.CreateIntrinsic(Int, {}, {Lo, Hi, Addr});
21491 }
21492
21493 Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
21494 Type *Tys[] = { Addr->getType() };
21496
21497 CallInst *CI = Builder.CreateCall(
21498 Strex, {Builder.CreateZExtOrBitCast(
21499 Val, Strex->getFunctionType()->getParamType(0)),
21500 Addr});
21501 CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType,
21502 Val->getType()));
21503 return CI;
21504}
21505
21506
21508 return Subtarget->isMClass();
21509}
21510
21511/// A helper function for determining the number of interleaved accesses we
21512/// will generate when lowering accesses of the given type.
21513unsigned
21515 const DataLayout &DL) const {
21516 return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
21517}
21518
21520 unsigned Factor, FixedVectorType *VecTy, Align Alignment,
21521 const DataLayout &DL) const {
21522
21523 unsigned VecSize = DL.getTypeSizeInBits(VecTy);
21524 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
21525
21526 if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())
21527 return false;
21528
21529 // Ensure the vector doesn't have f16 elements. Even though we could do an
21530 // i16 vldN, we can't hold the f16 vectors and will end up converting via
21531 // f32.
21532 if (Subtarget->hasNEON() && VecTy->getElementType()->isHalfTy())
21533 return false;
21534 if (Subtarget->hasMVEIntegerOps() && Factor == 3)
21535 return false;
21536
21537 // Ensure the number of vector elements is greater than 1.
21538 if (VecTy->getNumElements() < 2)
21539 return false;
21540
21541 // Ensure the element type is legal.
21542 if (ElSize != 8 && ElSize != 16 && ElSize != 32)
21543 return false;
21544 // And the alignment if high enough under MVE.
21545 if (Subtarget->hasMVEIntegerOps() && Alignment < ElSize / 8)
21546 return false;
21547
21548 // Ensure the total vector size is 64 or a multiple of 128. Types larger than
21549 // 128 will be split into multiple interleaved accesses.
21550 if (Subtarget->hasNEON() && VecSize == 64)
21551 return true;
21552 return VecSize % 128 == 0;
21553}
21554
21556 if (Subtarget->hasNEON())
21557 return 4;
21558 if (Subtarget->hasMVEIntegerOps())
21561}
21562
21563/// Lower an interleaved load into a vldN intrinsic.
21564///
21565/// E.g. Lower an interleaved load (Factor = 2):
21566/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
21567/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21568/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21569///
21570/// Into:
21571/// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
21572/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
21573/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
21576 ArrayRef<unsigned> Indices, unsigned Factor) const {
21577 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21578 "Invalid interleave factor");
21579 assert(!Shuffles.empty() && "Empty shufflevector input");
21580 assert(Shuffles.size() == Indices.size() &&
21581 "Unmatched number of shufflevectors and indices");
21582
21583 auto *VecTy = cast<FixedVectorType>(Shuffles[0]->getType());
21584 Type *EltTy = VecTy->getElementType();
21585
21586 const DataLayout &DL = LI->getDataLayout();
21587 Align Alignment = LI->getAlign();
21588
21589 // Skip if we do not have NEON and skip illegal vector types. We can
21590 // "legalize" wide vector types into multiple interleaved accesses as long as
21591 // the vector types are divisible by 128.
21592 if (!isLegalInterleavedAccessType(Factor, VecTy, Alignment, DL))
21593 return false;
21594
21595 unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
21596
21597 // A pointer vector can not be the return type of the ldN intrinsics. Need to
21598 // load integer vectors first and then convert to pointer vectors.
21599 if (EltTy->isPointerTy())
21600 VecTy = FixedVectorType::get(DL.getIntPtrType(EltTy), VecTy);
21601
21602 IRBuilder<> Builder(LI);
21603
21604 // The base address of the load.
21605 Value *BaseAddr = LI->getPointerOperand();
21606
21607 if (NumLoads > 1) {
21608 // If we're going to generate more than one load, reset the sub-vector type
21609 // to something legal.
21610 VecTy = FixedVectorType::get(VecTy->getElementType(),
21611 VecTy->getNumElements() / NumLoads);
21612 }
21613
21614 assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
21615
21616 auto createLoadIntrinsic = [&](Value *BaseAddr) {
21617 if (Subtarget->hasNEON()) {
21618 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21619 Type *Tys[] = {VecTy, PtrTy};
21620 static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
21621 Intrinsic::arm_neon_vld3,
21622 Intrinsic::arm_neon_vld4};
21623
21625 Ops.push_back(BaseAddr);
21626 Ops.push_back(Builder.getInt32(LI->getAlign().value()));
21627
21628 return Builder.CreateIntrinsic(LoadInts[Factor - 2], Tys, Ops,
21629 /*FMFSource=*/nullptr, "vldN");
21630 } else {
21631 assert((Factor == 2 || Factor == 4) &&
21632 "expected interleave factor of 2 or 4 for MVE");
21633 Intrinsic::ID LoadInts =
21634 Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
21635 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21636 Type *Tys[] = {VecTy, PtrTy};
21637
21639 Ops.push_back(BaseAddr);
21640 return Builder.CreateIntrinsic(LoadInts, Tys, Ops, /*FMFSource=*/nullptr,
21641 "vldN");
21642 }
21643 };
21644
21645 // Holds sub-vectors extracted from the load intrinsic return values. The
21646 // sub-vectors are associated with the shufflevector instructions they will
21647 // replace.
21649
21650 for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
21651 // If we're generating more than one load, compute the base address of
21652 // subsequent loads as an offset from the previous.
21653 if (LoadCount > 0)
21654 BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
21655 VecTy->getNumElements() * Factor);
21656
21657 CallInst *VldN = createLoadIntrinsic(BaseAddr);
21658
21659 // Replace uses of each shufflevector with the corresponding vector loaded
21660 // by ldN.
21661 for (unsigned i = 0; i < Shuffles.size(); i++) {
21662 ShuffleVectorInst *SV = Shuffles[i];
21663 unsigned Index = Indices[i];
21664
21665 Value *SubVec = Builder.CreateExtractValue(VldN, Index);
21666
21667 // Convert the integer vector to pointer vector if the element is pointer.
21668 if (EltTy->isPointerTy())
21669 SubVec = Builder.CreateIntToPtr(
21670 SubVec,
21672
21673 SubVecs[SV].push_back(SubVec);
21674 }
21675 }
21676
21677 // Replace uses of the shufflevector instructions with the sub-vectors
21678 // returned by the load intrinsic. If a shufflevector instruction is
21679 // associated with more than one sub-vector, those sub-vectors will be
21680 // concatenated into a single wide vector.
21681 for (ShuffleVectorInst *SVI : Shuffles) {
21682 auto &SubVec = SubVecs[SVI];
21683 auto *WideVec =
21684 SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
21685 SVI->replaceAllUsesWith(WideVec);
21686 }
21687
21688 return true;
21689}
21690
21691/// Lower an interleaved store into a vstN intrinsic.
21692///
21693/// E.g. Lower an interleaved store (Factor = 3):
21694/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21695/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21696/// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
21697///
21698/// Into:
21699/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21700/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21701/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21702/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21703///
21704/// Note that the new shufflevectors will be removed and we'll only generate one
21705/// vst3 instruction in CodeGen.
21706///
21707/// Example for a more general valid mask (Factor 3). Lower:
21708/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
21709/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
21710/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21711///
21712/// Into:
21713/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
21714/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
21715/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
21716/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21718 ShuffleVectorInst *SVI,
21719 unsigned Factor) const {
21720 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21721 "Invalid interleave factor");
21722
21723 auto *VecTy = cast<FixedVectorType>(SVI->getType());
21724 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
21725
21726 unsigned LaneLen = VecTy->getNumElements() / Factor;
21727 Type *EltTy = VecTy->getElementType();
21728 auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
21729
21730 const DataLayout &DL = SI->getDataLayout();
21731 Align Alignment = SI->getAlign();
21732
21733 // Skip if we do not have NEON and skip illegal vector types. We can
21734 // "legalize" wide vector types into multiple interleaved accesses as long as
21735 // the vector types are divisible by 128.
21736 if (!isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, DL))
21737 return false;
21738
21739 unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
21740
21741 Value *Op0 = SVI->getOperand(0);
21742 Value *Op1 = SVI->getOperand(1);
21743 IRBuilder<> Builder(SI);
21744
21745 // StN intrinsics don't support pointer vectors as arguments. Convert pointer
21746 // vectors to integer vectors.
21747 if (EltTy->isPointerTy()) {
21748 Type *IntTy = DL.getIntPtrType(EltTy);
21749
21750 // Convert to the corresponding integer vector.
21751 auto *IntVecTy =
21752 FixedVectorType::get(IntTy, cast<FixedVectorType>(Op0->getType()));
21753 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
21754 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
21755
21756 SubVecTy = FixedVectorType::get(IntTy, LaneLen);
21757 }
21758
21759 // The base address of the store.
21760 Value *BaseAddr = SI->getPointerOperand();
21761
21762 if (NumStores > 1) {
21763 // If we're going to generate more than one store, reset the lane length
21764 // and sub-vector type to something legal.
21765 LaneLen /= NumStores;
21766 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
21767 }
21768
21769 assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
21770
21771 auto Mask = SVI->getShuffleMask();
21772
21773 auto createStoreIntrinsic = [&](Value *BaseAddr,
21774 SmallVectorImpl<Value *> &Shuffles) {
21775 if (Subtarget->hasNEON()) {
21776 static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
21777 Intrinsic::arm_neon_vst3,
21778 Intrinsic::arm_neon_vst4};
21779 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21780 Type *Tys[] = {PtrTy, SubVecTy};
21781
21783 Ops.push_back(BaseAddr);
21784 append_range(Ops, Shuffles);
21785 Ops.push_back(Builder.getInt32(SI->getAlign().value()));
21786 Builder.CreateIntrinsic(StoreInts[Factor - 2], Tys, Ops);
21787 } else {
21788 assert((Factor == 2 || Factor == 4) &&
21789 "expected interleave factor of 2 or 4 for MVE");
21790 Intrinsic::ID StoreInts =
21791 Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
21792 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21793 Type *Tys[] = {PtrTy, SubVecTy};
21794
21796 Ops.push_back(BaseAddr);
21797 append_range(Ops, Shuffles);
21798 for (unsigned F = 0; F < Factor; F++) {
21799 Ops.push_back(Builder.getInt32(F));
21800 Builder.CreateIntrinsic(StoreInts, Tys, Ops);
21801 Ops.pop_back();
21802 }
21803 }
21804 };
21805
21806 for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
21807 // If we generating more than one store, we compute the base address of
21808 // subsequent stores as an offset from the previous.
21809 if (StoreCount > 0)
21810 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
21811 BaseAddr, LaneLen * Factor);
21812
21813 SmallVector<Value *, 4> Shuffles;
21814
21815 // Split the shufflevector operands into sub vectors for the new vstN call.
21816 for (unsigned i = 0; i < Factor; i++) {
21817 unsigned IdxI = StoreCount * LaneLen * Factor + i;
21818 if (Mask[IdxI] >= 0) {
21819 Shuffles.push_back(Builder.CreateShuffleVector(
21820 Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
21821 } else {
21822 unsigned StartMask = 0;
21823 for (unsigned j = 1; j < LaneLen; j++) {
21824 unsigned IdxJ = StoreCount * LaneLen * Factor + j;
21825 if (Mask[IdxJ * Factor + IdxI] >= 0) {
21826 StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
21827 break;
21828 }
21829 }
21830 // Note: If all elements in a chunk are undefs, StartMask=0!
21831 // Note: Filling undef gaps with random elements is ok, since
21832 // those elements were being written anyway (with undefs).
21833 // In the case of all undefs we're defaulting to using elems from 0
21834 // Note: StartMask cannot be negative, it's checked in
21835 // isReInterleaveMask
21836 Shuffles.push_back(Builder.CreateShuffleVector(
21837 Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
21838 }
21839 }
21840
21841 createStoreIntrinsic(BaseAddr, Shuffles);
21842 }
21843 return true;
21844}
21845
21853
21855 uint64_t &Members) {
21856 if (auto *ST = dyn_cast<StructType>(Ty)) {
21857 for (unsigned i = 0; i < ST->getNumElements(); ++i) {
21858 uint64_t SubMembers = 0;
21859 if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
21860 return false;
21861 Members += SubMembers;
21862 }
21863 } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
21864 uint64_t SubMembers = 0;
21865 if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
21866 return false;
21867 Members += SubMembers * AT->getNumElements();
21868 } else if (Ty->isFloatTy()) {
21869 if (Base != HA_UNKNOWN && Base != HA_FLOAT)
21870 return false;
21871 Members = 1;
21872 Base = HA_FLOAT;
21873 } else if (Ty->isDoubleTy()) {
21874 if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
21875 return false;
21876 Members = 1;
21877 Base = HA_DOUBLE;
21878 } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
21879 Members = 1;
21880 switch (Base) {
21881 case HA_FLOAT:
21882 case HA_DOUBLE:
21883 return false;
21884 case HA_VECT64:
21885 return VT->getPrimitiveSizeInBits().getFixedValue() == 64;
21886 case HA_VECT128:
21887 return VT->getPrimitiveSizeInBits().getFixedValue() == 128;
21888 case HA_UNKNOWN:
21889 switch (VT->getPrimitiveSizeInBits().getFixedValue()) {
21890 case 64:
21891 Base = HA_VECT64;
21892 return true;
21893 case 128:
21894 Base = HA_VECT128;
21895 return true;
21896 default:
21897 return false;
21898 }
21899 }
21900 }
21901
21902 return (Members > 0 && Members <= 4);
21903}
21904
21905/// Return the correct alignment for the current calling convention.
21907 Type *ArgTy, const DataLayout &DL) const {
21908 const Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
21909 if (!ArgTy->isVectorTy())
21910 return ABITypeAlign;
21911
21912 // Avoid over-aligning vector parameters. It would require realigning the
21913 // stack and waste space for no real benefit.
21914 MaybeAlign StackAlign = DL.getStackAlignment();
21915 assert(StackAlign && "data layout string is missing stack alignment");
21916 return std::min(ABITypeAlign, *StackAlign);
21917}
21918
21919/// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
21920/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
21921/// passing according to AAPCS rules.
21923 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
21924 const DataLayout &DL) const {
21925 if (getEffectiveCallingConv(CallConv, isVarArg) !=
21927 return false;
21928
21930 uint64_t Members = 0;
21931 bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
21932 LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
21933
21934 bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
21935 return IsHA || IsIntArray;
21936}
21937
21939 const Constant *PersonalityFn) const {
21940 // Platforms which do not use SjLj EH may return values in these registers
21941 // via the personality function.
21942 return Subtarget->useSjLjEH() ? Register() : ARM::R0;
21943}
21944
21946 const Constant *PersonalityFn) const {
21947 // Platforms which do not use SjLj EH may return values in these registers
21948 // via the personality function.
21949 return Subtarget->useSjLjEH() ? Register() : ARM::R1;
21950}
21951
21952void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
21953 // Update IsSplitCSR in ARMFunctionInfo.
21954 ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
21955 AFI->setIsSplitCSR(true);
21956}
21957
21958void ARMTargetLowering::insertCopiesSplitCSR(
21959 MachineBasicBlock *Entry,
21960 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
21961 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
21962 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
21963 if (!IStart)
21964 return;
21965
21966 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
21967 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
21968 MachineBasicBlock::iterator MBBI = Entry->begin();
21969 for (const MCPhysReg *I = IStart; *I; ++I) {
21970 const TargetRegisterClass *RC = nullptr;
21971 if (ARM::GPRRegClass.contains(*I))
21972 RC = &ARM::GPRRegClass;
21973 else if (ARM::DPRRegClass.contains(*I))
21974 RC = &ARM::DPRRegClass;
21975 else
21976 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
21977
21978 Register NewVR = MRI->createVirtualRegister(RC);
21979 // Create copy from CSR to a virtual register.
21980 // FIXME: this currently does not emit CFI pseudo-instructions, it works
21981 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
21982 // nounwind. If we want to generalize this later, we may need to emit
21983 // CFI pseudo-instructions.
21984 assert(Entry->getParent()->getFunction().hasFnAttribute(
21985 Attribute::NoUnwind) &&
21986 "Function should be nounwind in insertCopiesSplitCSR!");
21987 Entry->addLiveIn(*I);
21988 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
21989 .addReg(*I);
21990
21991 // Insert the copy-back instructions right before the terminator.
21992 for (auto *Exit : Exits)
21993 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
21994 TII->get(TargetOpcode::COPY), *I)
21995 .addReg(NewVR);
21996 }
21997}
21998
22002}
22003
22005 return Subtarget->hasMVEIntegerOps();
22006}
22007
22010 auto *VTy = dyn_cast<FixedVectorType>(Ty);
22011 if (!VTy)
22012 return false;
22013
22014 auto *ScalarTy = VTy->getScalarType();
22015 unsigned NumElements = VTy->getNumElements();
22016
22017 unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
22018 if (VTyWidth < 128 || !llvm::isPowerOf2_32(VTyWidth))
22019 return false;
22020
22021 // Both VCADD and VCMUL/VCMLA support the same types, F16 and F32
22022 if (ScalarTy->isHalfTy() || ScalarTy->isFloatTy())
22023 return Subtarget->hasMVEFloatOps();
22024
22026 return false;
22027
22028 return Subtarget->hasMVEIntegerOps() &&
22029 (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
22030 ScalarTy->isIntegerTy(32));
22031}
22032
22035 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
22036 Value *Accumulator) const {
22037
22038 FixedVectorType *Ty = cast<FixedVectorType>(InputA->getType());
22039
22040 unsigned TyWidth = Ty->getScalarSizeInBits() * Ty->getNumElements();
22041
22042 assert(TyWidth >= 128 && "Width of vector type must be at least 128 bits");
22043
22044 if (TyWidth > 128) {
22045 int Stride = Ty->getNumElements() / 2;
22046 auto SplitSeq = llvm::seq<int>(0, Ty->getNumElements());
22047 auto SplitSeqVec = llvm::to_vector(SplitSeq);
22048 ArrayRef<int> LowerSplitMask(&SplitSeqVec[0], Stride);
22049 ArrayRef<int> UpperSplitMask(&SplitSeqVec[Stride], Stride);
22050
22051 auto *LowerSplitA = B.CreateShuffleVector(InputA, LowerSplitMask);
22052 auto *LowerSplitB = B.CreateShuffleVector(InputB, LowerSplitMask);
22053 auto *UpperSplitA = B.CreateShuffleVector(InputA, UpperSplitMask);
22054 auto *UpperSplitB = B.CreateShuffleVector(InputB, UpperSplitMask);
22055 Value *LowerSplitAcc = nullptr;
22056 Value *UpperSplitAcc = nullptr;
22057
22058 if (Accumulator) {
22059 LowerSplitAcc = B.CreateShuffleVector(Accumulator, LowerSplitMask);
22060 UpperSplitAcc = B.CreateShuffleVector(Accumulator, UpperSplitMask);
22061 }
22062
22063 auto *LowerSplitInt = createComplexDeinterleavingIR(
22064 B, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
22065 auto *UpperSplitInt = createComplexDeinterleavingIR(
22066 B, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
22067
22068 ArrayRef<int> JoinMask(&SplitSeqVec[0], Ty->getNumElements());
22069 return B.CreateShuffleVector(LowerSplitInt, UpperSplitInt, JoinMask);
22070 }
22071
22072 auto *IntTy = Type::getInt32Ty(B.getContext());
22073
22074 ConstantInt *ConstRotation = nullptr;
22075 if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {
22076 ConstRotation = ConstantInt::get(IntTy, (int)Rotation);
22077
22078 if (Accumulator)
22079 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmlaq, Ty,
22080 {ConstRotation, Accumulator, InputB, InputA});
22081 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmulq, Ty,
22082 {ConstRotation, InputB, InputA});
22083 }
22084
22085 if (OperationType == ComplexDeinterleavingOperation::CAdd) {
22086 // 1 means the value is not halved.
22087 auto *ConstHalving = ConstantInt::get(IntTy, 1);
22088
22090 ConstRotation = ConstantInt::get(IntTy, 0);
22092 ConstRotation = ConstantInt::get(IntTy, 1);
22093
22094 if (!ConstRotation)
22095 return nullptr; // Invalid rotation for arm_mve_vcaddq
22096
22097 return B.CreateIntrinsic(Intrinsic::arm_mve_vcaddq, Ty,
22098 {ConstHalving, ConstRotation, InputA, InputB});
22099 }
22100
22101 return nullptr;
22102}
unsigned const MachineRegisterInfo * MRI
static bool isAddSubSExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isExtendedBUILD_VECTOR(SDValue N, SelectionDAG &DAG, bool isSigned)
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
static const MCPhysReg GPRArgRegs[]
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
#define MAKE_CASE(V)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
static bool isAddSubZExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
unsigned RegSize
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static bool isConstant(const MachineInstr &MI)
static const LLT S1
static const LLT F64
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG)
static bool isStore(int Opcode)
static bool isThumb(const MCSubtargetInfo &STI)
static SDValue LowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG)
static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
@ HA_DOUBLE
@ HA_VECT128
@ HA_VECT64
@ HA_FLOAT
@ HA_UNKNOWN
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total value size to 64 bits.
static cl::opt< unsigned > ConstpoolPromotionMaxSize("arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), cl::init(64))
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isVTBLMask(ArrayRef< int > M, EVT VT)
static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
static cl::opt< bool > EnableConstpoolPromotion("arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), cl::init(false))
static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformExtractFpToIntStores(StoreSDNode *St, SelectionDAG &DAG)
static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
static SDValue PerformExtractEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static const APInt * isPowerOf2Constant(SDValue V)
static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) can replace combinations of ...
static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
static SDValue LowerVECTOR_SHUFFLEUsingOneOff(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static bool isValidMVECond(unsigned CC, bool IsFloat)
static SDValue PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC)
IntCCToARMCC - Convert a DAG integer condition code to an ARM CC.
static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSTORECombine - Target-specific dag combine xforms for ISD::STORE.
static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, SelectionDAG &DAG)
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isGTorGE(ISD::CondCode CC)
static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) intrinsic,...
static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask)
static bool isReverseMask(ArrayRef< int > M, EVT VT)
static bool isVZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of "vector_shuffle v,...
static SDValue PerformSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVECTOR_REG_CASTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVMulVCTPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVMulVCTPCombine - VCVT (fixed-point to floating-point, Advanced SIMD) can replace combinations...
static SDValue createGPRPairNode2xi32(SelectionDAG &DAG, SDValue V0, SDValue V1)
static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG)
static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc)
static bool isVTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool CanInvertMVEVCMP(SDValue N)
static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG)
static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
PerformShiftCombine - Checks for immediate versions of vector shifts and lowers them.
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2)
FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static EVT getVectorTyFromPredicateVector(EVT VT)
static SDValue PerformFADDVCMLACombine(SDNode *N, SelectionDAG &DAG)
static SDValue handleCMSEValue(const SDValue &Value, const ISD::InputArg &Arg, SelectionDAG &DAG, const SDLoc &DL)
static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
static bool isSRL16(const SDValue &Op)
static SDValue PerformVMOVrhCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC)
static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr, SDValue Inc, const SelectionDAG &DAG)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static Register genTPEntry(MachineBasicBlock *TpEntry, MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpExit, Register OpSizeReg, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI)
Adds logic in loop entry MBB to calculate loop iteration count and adds t2WhileLoopSetup and t2WhileL...
static SDValue createGPRPairNodei64(SelectionDAG &DAG, SDValue V)
static bool isLTorLE(ISD::CondCode CC)
static SDValue PerformVCMPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformMVEVMULLCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, SelectionDAG &DAG)
static EVT getExtensionTo64Bits(const EVT &OrigVT)
static SDValue PerformBITCASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG)
static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock *BB, const TargetRegisterInfo *TRI)
static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG)
static bool hasNormalLoadOperand(SDNode *N)
hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node are normal,...
static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformInsertEltCombine - Target-specific dag combine xforms for ISD::INSERT_VECTOR_ELT.
static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVDUPLANECombine - Target-specific dag combine xforms for ARMISD::VDUPLANE.
static SDValue LowerBuildVectorOfFPTrunc(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static cl::opt< unsigned > ConstpoolPromotionMaxTotal("arm-promote-constant-max-total", cl::Hidden, cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128))
static SDValue LowerTruncatei1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static RTLIB::Libcall getDivRemLibcall(const SDNode *N, MVT::SimpleValueType SVT)
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG &DAG)
SkipLoadExtensionForVMULL - return a load of the original vector size that does not do any sign/zero ...
static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static const MCPhysReg GPRArgRegs[]
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.
static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT, SelectionDAG &DAG)
static bool isVZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformORCombineToSMULWBT(SDNode *OR, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isVTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of "vector_shuffle v,...
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue FindBFIToCombineWith(SDNode *N)
static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, SelectionDAG &DAG)
ShuffleOpCodes
@ OP_VEXT3
@ OP_VTRNR
@ OP_VDUP1
@ OP_VZIPR
@ OP_VUZPR
@ OP_VREV
@ OP_VZIPL
@ OP_VTRNL
@ OP_COPY
@ OP_VEXT1
@ OP_VDUP0
@ OP_VEXT2
@ OP_VUZPL
@ OP_VDUP3
@ OP_VDUP2
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps)
static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isS16(const SDValue &Op, SelectionDAG &DAG)
static bool isSRA16(const SDValue &Op)
constexpr MVT FlagsVT
Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
static SDValue AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue LowerInterruptReturn(SmallVectorImpl< SDValue > &RetOps, const SDLoc &DL, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, SelectionDAG &DAG)
static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2)
static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isSHL16(const SDValue &Op)
static bool isVEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseVEXT, unsigned &Imm)
static SDValue PerformMVEVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isTruncMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2)
Return the load opcode for a given load size.
static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
static bool isLegalMVEShuffleOp(unsigned PFEntry)
static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformShuffleVMOVNCombine(ShuffleVectorSDNode *N, SelectionDAG &DAG)
static bool isVUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG)
PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for ISD::VECTOR_SHUFFLE.
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG)
SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, ANY_EXTEND,...
static bool isVMOVNTruncMask(ArrayRef< int > M, EVT ToVT, bool rev)
static SDValue PerformVQMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
static SDValue LowerVecReduceMinMax(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformAddcSubcCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static TargetLowering::ArgListTy getDivRemArgList(const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget)
static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getZeroVector - Returns a vector of specified type with all zero elements.
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static ARMCC::CondCodes getVCMPCondCode(SDValue N)
static cl::opt< bool > ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true))
static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformORCombineToBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVSetCCToVCTPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerBUILD_VECTORToVIDUP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isZeroVector(SDValue N)
static SDValue PerformAddeSubeCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void ReplaceCMP_SWAP_64Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG)
static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment store operation with given size.
static bool isVMOVNMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, NEON load/store intrinsics,...
static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformCSETCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue PerformInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorExtend(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain)
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMULCombine Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the special multi...
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformORCombine - Target-specific dag combine xforms for ISD::OR.
static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG)
static SDValue PerformTruncatingStoreCombine(StoreSDNode *St, SelectionDAG &DAG)
static unsigned SelectPairHalf(unsigned Elements, ArrayRef< int > Mask, unsigned Index)
static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment load operation with given size.
static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG)
static bool isValidBaseUpdate(SDNode *N, SDNode *User)
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, const SDLoc &dl)
static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op)
static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment, bool isSEXTLoad, bool IsMasked, bool isLE, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
std::pair< unsigned, const TargetRegisterClass * > RCPair
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, ISD::ZERO_EXTEND,...
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, EVT VectorVT, VMOVModImmType type)
isVMOVModifiedImm - Check if the specified splat value corresponds to a valid vector constant for a N...
static SDValue LowerBuildVectorOfFPExt(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, SelectionDAG &DAG)
BC is a bitcast that is about to be turned into a VMOVDRR.
static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, const SDLoc &dl)
static unsigned isNEONTwoResultShuffleMask(ArrayRef< int > ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF)
Check if ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), and return the corresponding AR...
static bool BitsProperlyConcatenate(const APInt &A, const APInt &B)
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static SDValue LowerVecReduce(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, struct BaseUpdateUser &User, bool SimpleConstIncOnly, TargetLowering::DAGCombinerInfo &DCI)
static bool allUsersAreInFunction(const Value *V, const Function *F)
Return true if all users of V are within function F, looking through ConstantExprs.
static bool isSingletonVEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG)
PerformVMOVDRRCombine - Target-specific dag combine xforms for ARMISD::VMOVDRR.
static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, SDValue &SatK)
static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
isLegalAddressImmediate - Return true if the integer value can be used as the offset of the target ad...
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isLegalT1AddressImmediate(int64_t V, EVT VT)
static SDValue CombineANDShift(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformADDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDECombine - Target-specific dag combine transform from ARMISD::ADDC, ARMISD::ADDE,...
static SDValue PerformReduceShuffleCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerTruncate(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformHWLoopCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformSplittingMVETruncToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool isVUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of "vector_shuffle v,...
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members)
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG)
static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm, bool &Negate)
static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget)
canChangeToInt - Given the fp compare operand, return true if it is suitable to morph to an integer c...
static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2)
Return the store opcode for a given store size.
static bool IsVUZPShuffleNode(SDNode *N)
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, MachineInstr &MI, const SDNode *Node)
Attaches vregs to MEMCPY that it will use as scratch registers when it is expanded into LDM/STM.
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
static SDValue findMUL_LOHI(SDValue V)
static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformORCombine_i1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformSplittingMVEEXTToWideningLoad(SDNode *N, SelectionDAG &DAG)
static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG)
static void genTPLoopBody(MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI, Register OpSrcReg, Register OpDestReg, Register ElementCountReg, Register TotalIterationsReg, bool IsMemcpy)
Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and t2DoLoopEnd.
static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformBUILD_VECTORCombine - Target-specific dag combine xforms for ISD::BUILD_VECTOR.
static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformMinMaxCombine - Target-specific DAG combining for creating truncating saturates.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file a TargetTransformInfo::Concept conforming object specific to the ARM target machine.
Function Alias Analysis Results
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
uint64_t Addr
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:235
static bool isSigned(unsigned int Opcode)
#define Check(C,...)
#define op(i)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Value * LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP)
Emit the code to lower ctpop of V before the specified instruction IP.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
loop Loop Strength Reduction
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
nvptx lower args
uint64_t High
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
PowerPC Reduce CR logical Operation
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
This file describes how to lower LLVM code to machine code.
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static constexpr int Concat[]
Value * RHS
Value * LHS
static bool isIntrinsic(const CallBase &Call, Intrinsic::ID ID)
bool getExactInverse(APFloat *inv) const
Definition: APFloat.h:1479
APInt bitcastToAPInt() const
Definition: APFloat.h:1346
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1321
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1649
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1007
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1111
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1618
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1577
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:624
unsigned logBase2() const
Definition: APInt.h:1739
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:475
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1635
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
virtual const ARMBaseRegisterInfo & getRegisterInfo() const =0
const uint32_t * getSjLjDispatchPreservedMask(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
Register getFrameRegister(const MachineFunction &MF) const override
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
const uint32_t * getTLSCallPreservedMask(const MachineFunction &MF) const
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that 'returned' is on...
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
SmallPtrSet< const GlobalVariable *, 2 > & getGlobalsPromotedToConstantPool()
void setArgumentStackToRestore(unsigned v)
void setPromotedConstpoolIncrease(int Sz)
void setArgRegsSaveSize(unsigned s)
void setReturnRegsCount(unsigned s)
void setVarArgsFrameIndex(int Index)
unsigned getArgRegsSaveSize() const
void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV)
Indicate to the backend that GV has had its storage changed to inside a constant pool.
void setArgumentStackSize(unsigned size)
unsigned getArgumentStackSize() const
bool isTargetMachO() const
Definition: ARMSubtarget.h:349
bool useMovt() const
bool isTargetAEABI() const
Definition: ARMSubtarget.h:358
bool hasARMOps() const
Definition: ARMSubtarget.h:302
bool supportsTailCall() const
Definition: ARMSubtarget.h:427
const Triple & getTargetTriple() const
Definition: ARMSubtarget.h:335
bool hasVFP4Base() const
Definition: ARMSubtarget.h:310
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:238
bool isThumb1Only() const
Definition: ARMSubtarget.h:403
bool useFPVFMx() const
Definition: ARMSubtarget.h:319
bool hasFPARMv8Base() const
Definition: ARMSubtarget.h:311
bool isThumb2() const
Definition: ARMSubtarget.h:404
bool isTargetWindows() const
Definition: ARMSubtarget.h:345
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
bool hasBaseDSP() const
Definition: ARMSubtarget.h:325
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:242
bool useSjLjEH() const
Definition: ARMSubtarget.h:324
bool isTargetDarwin() const
Definition: ARMSubtarget.h:337
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:250
bool hasVFP2Base() const
Definition: ARMSubtarget.h:308
bool isTargetAndroid() const
Definition: ARMSubtarget.h:389
bool isROPI() const
bool isTargetCOFF() const
Definition: ARMSubtarget.h:347
bool isTargetGNUAEABI() const
Definition: ARMSubtarget.h:363
bool hasVFP3Base() const
Definition: ARMSubtarget.h:309
bool isAPCS_ABI() const
bool useFPVFMx64() const
Definition: ARMSubtarget.h:323
bool isTargetWatchOS() const
Definition: ARMSubtarget.h:339
unsigned getPreferBranchLogAlignment() const
Definition: ARMSubtarget.h:514
bool hasMinSize() const
Definition: ARMSubtarget.h:402
bool isTargetIOS() const
Definition: ARMSubtarget.h:338
bool useNEONForSinglePrecisionFP() const
Definition: ARMSubtarget.h:304
const InstrItineraryData * getInstrItineraryData() const override
getInstrItins - Return the instruction itineraries based on subtarget selection.
Definition: ARMSubtarget.h:461
bool isTargetWatchABI() const
Definition: ARMSubtarget.h:340
bool hasAnyDataBarrier() const
Definition: ARMSubtarget.h:313
bool isTargetDriverKit() const
Definition: ARMSubtarget.h:341
bool isAAPCS_ABI() const
bool isRWPI() const
bool isLittle() const
Definition: ARMSubtarget.h:435
bool allowsUnalignedMem() const
Definition: ARMSubtarget.h:429
bool isTargetMuslAEABI() const
Definition: ARMSubtarget.h:370
bool isTargetLinux() const
Definition: ARMSubtarget.h:342
bool useFPVFMx16() const
Definition: ARMSubtarget.h:322
bool isMClass() const
Definition: ARMSubtarget.h:405
bool isTargetHardFloat() const
bool useMulOps() const
Definition: ARMSubtarget.h:317
bool isTargetELF() const
Definition: ARMSubtarget.h:348
Align getDualLoadStoreAlignment() const
Definition: ARMSubtarget.h:471
bool isReadOnly(const GlobalValue *GV) const
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Align getABIAlignmentForCallingConv(Type *ArgTy, const DataLayout &DL) const override
Return the correct alignment for the current calling convention.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
const ARMSubtarget * getSubtarget() const
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const
bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const
Returns true if the addressing mode representing by AM is legal for the Thumb1 target,...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
SDValue PerformMVETruncCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize=false) const override
isFPImmLegal - Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
SDValue PerformIntrinsicCombine(SDNode *N, DAGCombinerInfo &DCI) const
PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
bool ExpandInlineAsm(CallInst *CI) const override
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const
PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
Value * createComplexDeinterleavingIR(IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator=nullptr) const override
Create the IR node for the given complex deinterleaving operation.
bool isComplexDeinterleavingSupported() const override
Does this target support complex deinterleaving.
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
SDValue PerformMVEExtCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to fold a pair of shifts into a mask.
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &OriginalDemandedBits, const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the value type to use for ISD::SETCC.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
createFastISel - This method returns a target specific FastISel object, or null if the target does no...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
isShuffleMaskLegal - Targets can use this to indicate that they only support some VECTOR_SHUFFLE oper...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const override
getRegClassFor - Return the register class that should be used for the specified value type.
bool useLoadStackGuardNode(const Module &M) const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vstN intrinsic.
ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI)
bool isComplexDeinterleavingOperationSupported(ComplexDeinterleavingOperation Operation, Type *Ty) const override
Does this target support complex deinterleaving with the given operation and type.
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const
PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const override
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
Instruction * makeDMB(IRBuilderBase &Builder, ARM_MB::MemBOpt Domain) const
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
const char * LowerXConstraint(EVT ConstraintVT) const override
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
allowsMisalignedMemoryAccesses - Returns true if the target allows unaligned memory accesses of the s...
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
bool isVectorLoadExtDesirable(SDValue ExtVal) const override
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override
Return true if the target can combine store(extractelement VectorTy, Idx).
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vldN intrinsic.
bool useSoftFloat() const override
bool alignLoopsWithOptSize() const override
Should loops be aligned even when the function is marked OptSize (but not MinSize).
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
Returns true if an argument of type Ty needs to be passed in a contiguous block of registers in calli...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPostIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mo...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
bool isFloatingPointOperation() const
Definition: Instructions.h:882
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:95
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
The address of a basic block.
Definition: Constants.h:893
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2,...
CCState - This class holds information needed while lowering arguments and return values.
void getInRegsParamInfo(unsigned InRegsParamRecordIndex, unsigned &BeginReg, unsigned &EndReg) const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
void rewindByValRegsInfo()
unsigned getInRegsParamsProcessed() const
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd)
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
unsigned getInRegsParamsCount() const
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1342
AttributeList getAttributes() const
Return the attributes for this call.
Definition: InstrTypes.h:1425
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1502
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:709
const APFloat & getValueAPF() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:271
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:197
bool isBigEndian() const
Definition: DataLayout.h:198
MaybeAlign getStackAlignment() const
Returns the natural stack alignment, or MaybeAlign() if one wasn't specified.
Definition: DataLayout.h:227
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:457
Align getPreferredAlign(const GlobalVariable *GV) const
Returns the preferred alignment of the specified global.
Definition: DataLayout.cpp:988
StringRef getPrivateGlobalPrefix() const
Definition: DataLayout.h:285
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
bool empty() const
Definition: DenseMap.h:98
iterator begin()
Definition: DenseMap.h:75
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:563
unsigned getNumElements() const
Definition: DerivedTypes.h:606
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:170
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
arg_iterator arg_begin()
Definition: Function.h:868
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
bool hasStructRetAttr() const
Determine if the function returns a structure through first or second pointer argument.
Definition: Function.h:688
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:234
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
const GlobalValue * getGlobal() const
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:278
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:631
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2165
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1902
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2547
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2150
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1460
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1439
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2048
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2525
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2145
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2444
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2034
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1520
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:566
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2181
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
bool isEmpty() const
Returns true if there are no itineraries.
bool hasAtomicStore() const LLVM_READONLY
Return true if this atomic instruction stores to memory.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
Class to represent integer types.
Definition: DerivedTypes.h:42
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:74
static bool LowerToByteSwap(CallInst *CI)
Try to replace a call instruction with a call to a bswap intrinsic.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:176
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:261
Value * getPointerOperand()
Definition: Instructions.h:255
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:600
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:237
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
void moveAfter(MachineBasicBlock *NewBefore)
void setIsEHPad(bool V=true)
Indicates the block is a landing pad.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void computeMaxCallFrameSize(MachineFunction &MF, std::vector< MachineBasicBlock::iterator > *FrameSDOps=nullptr)
Computes the maximum size of a callframe.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getFunctionContextIndex() const
Return the index for the function context object.
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & reset(Property P)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
unsigned createJumpTableIndex(const std::vector< MachineBasicBlock * > &DestBBs)
createJumpTableIndex - Create a new jump table.
@ EK_Inline
EK_Inline - Jump table entries are emitted inline at their point of use.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This class is used to represent an MLOAD node.
This class is used to represent an MSTORE node.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
bool isVolatile() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:130
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:686
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< use_iterator > uses()
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Return true if the type of the node type undefined.
iterator_range< user_iterator > users()
void setFlags(SDNodeFlags NewFlags)
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:748
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:497
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:799
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:501
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:854
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:825
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:495
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:710
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:496
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:698
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:490
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:508
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:765
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:578
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
void reserve(size_type NumEntries)
Definition: SmallPtrSet.h:112
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
bool empty() const
Definition: SmallSet.h:168
bool erase(const T &V)
Definition: SmallSet.h:193
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:805
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
const unsigned char * bytes_end() const
Definition: StringRef.h:131
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
const unsigned char * bytes_begin() const
Definition: StringRef.h:128
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC)
Override the default CondCode to be used to test the result of the comparison libcall against zero.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
ShiftLegalizationStrategy
Return the preferred strategy to legalize tihs SHIFT instruction, with ExpansionFactor being the recu...
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
const TargetMachine & getTargetMachine() const
void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC)
Set the CallingConv that should be used for the specified libcall.
void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked load does or does not work with the specified type and ind...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
virtual unsigned getMaxSupportedInterleaveFactor() const
Get the maximum supported factor for interleaved memory accesses.
void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked store does or does not work with the specified type and in...
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
bool isPositionIndependent() const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
TargetOptions Options
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
ObjectFormatType getObjectFormat() const
Get the object format for this triple.
Definition: Triple.h:409
bool isOSVersionLT(unsigned Major, unsigned Minor=0, unsigned Micro=0) const
Helper function for doing comparisons against version numbers included in the target triple.
Definition: Triple.h:510
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
Definition: Triple.h:645
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:261
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
Type * getArrayElementType() const
Definition: Type.h:411
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
void dump() const
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:225
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
Type * getElementType() const
Definition: DerivedTypes.h:460
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
@ SECREL
Thread Pointer Offset.
@ SBREL
Section Relative (Windows TLS)
@ GOTTPOFF
Global Offset Table, PC Relative.
@ TPOFF
Global Offset Table, Thread Pointer Offset.
TOF
Target Operand Flag enum.
Definition: ARMBaseInfo.h:242
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
Definition: ARMBaseInfo.h:288
@ MO_SBREL
MO_SBREL - On a symbol operand, this represents a static base relative relocation.
Definition: ARMBaseInfo.h:270
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:275
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
Definition: ARMBaseInfo.h:266
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: ARMBaseInfo.h:263
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
unsigned getAM2Offset(unsigned AM2Opc)
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
unsigned createVMOVModImm(unsigned OpCmode, unsigned Val)
int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
int getFP32FP16Imm(const APInt &Imm)
If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding for it.
AddrOpc getAM2Op(unsigned AM2Opc)
bool isBitFieldInvertedMask(unsigned v)
const unsigned FPStatusBits
const unsigned FPReservedBits
const unsigned RoundingBitsPos
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
@ ARM_APCS
ARM Procedure Calling Standard (obsolete, but still used on some targets).
Definition: CallingConv.h:107
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:82
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
@ ARM_AAPCS
ARM Architecture Procedure Calling Standard calling convention (aka EABI).
Definition: CallingConv.h:111
@ CXX_FAST_TLS
Used for access functions.
Definition: CallingConv.h:72
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition: CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
@ ARM_AAPCS_VFP
Same as ARM_AAPCS, but uses hard floating point ABI.
Definition: CallingConv.h:114
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1197
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1193
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ SET_FPENV
Sets the current floating-point environment.
Definition: ISDOpcodes.h:1069
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1360
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1450
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:153
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:512
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1340
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1226
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1342
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1312
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1343
@ RESET_FPENV
Set floating-point environment to default state.
Definition: ISDOpcodes.h:1073
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ SET_FPMODE
Sets the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1092
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ EH_SJLJ_SETUP_DISPATCH
OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) The target initializes the dispatch table here.
Definition: ISDOpcodes.h:157
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1435
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1304
@ RESET_FPMODE
Sets default dynamic floating-point control modes.
Definition: ISDOpcodes.h:1096
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1449
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1338
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1339
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:997
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1490
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:936
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:685
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1118
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1432
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:752
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1292
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1436
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:788
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1341
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1127
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1308
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1451
@ RegisterMask
Definition: ISDOpcodes.h:75
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1222
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1444
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:931
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ GET_FPMODE
Reads the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1087
@ GET_FPENV
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1064
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1336
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1282
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:907
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1319
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1344
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1112
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1452
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:973
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1334
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:458
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:480
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:457
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1335
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1253
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1279
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:680
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1433
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1333
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:1004
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ SPONENTRY
SPONENTRY - Represents the llvm.sponentry intrinsic.
Definition: ISDOpcodes.h:112
@ STRICT_FLDEXP
Definition: ISDOpcodes.h:421
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:906
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:147
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1217
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1141
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:692
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1646
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1562
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1613
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1593
static const int LAST_INDEXED_MODE
Definition: ISDOpcodes.h:1564
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
ID ArrayRef< Type * > Tys
Definition: Intrinsics.h:102
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
bool CC_ARM_APCS_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
@ Read
Definition: CodeGenData.h:107
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:255
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:267
bool FastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
bool RetCC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
void shuffle(Iterator first, Iterator last, RNG &&g)
Definition: STLExtras.h:1558
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
bool CC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:279
bool CC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
bool RetCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_ARM_Win32_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1299
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
CombineLevel
Definition: DAGCombine.h:15
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:260
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool RetFastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
DWARFExpression::Operation Op
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
unsigned gettBLXrOpcode(const MachineFunction &MF)
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Load/store instruction that can be merged with a base address update.
SDNode * N
Instruction that updates a pointer.
unsigned ConstInc
Pointer increment value if it is a constant, or 0 otherwise.
SDValue Inc
Pointer increment operand.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:301
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:458
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:465
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:289
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:448
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:202
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:293
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:65
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:164
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:73
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:303
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:172
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition: KnownBits.h:336
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:804
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoSignedZeros() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setInRegister(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)