LLVM 20.0.0git
ARMISelLowering.cpp
Go to the documentation of this file.
1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMISelLowering.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
20#include "ARMPerfectShuffle.h"
21#include "ARMRegisterInfo.h"
22#include "ARMSelectionDAGInfo.h"
23#include "ARMSubtarget.h"
27#include "Utils/ARMBaseInfo.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/BitVector.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/Statistic.h"
38#include "llvm/ADT/StringRef.h"
40#include "llvm/ADT/Twine.h"
67#include "llvm/IR/Attributes.h"
68#include "llvm/IR/CallingConv.h"
69#include "llvm/IR/Constant.h"
70#include "llvm/IR/Constants.h"
71#include "llvm/IR/DataLayout.h"
72#include "llvm/IR/DebugLoc.h"
74#include "llvm/IR/Function.h"
75#include "llvm/IR/GlobalAlias.h"
76#include "llvm/IR/GlobalValue.h"
78#include "llvm/IR/IRBuilder.h"
79#include "llvm/IR/InlineAsm.h"
80#include "llvm/IR/Instruction.h"
83#include "llvm/IR/Intrinsics.h"
84#include "llvm/IR/IntrinsicsARM.h"
85#include "llvm/IR/Module.h"
86#include "llvm/IR/Type.h"
87#include "llvm/IR/User.h"
88#include "llvm/IR/Value.h"
89#include "llvm/MC/MCInstrDesc.h"
91#include "llvm/MC/MCSchedule.h"
98#include "llvm/Support/Debug.h"
106#include <algorithm>
107#include <cassert>
108#include <cstdint>
109#include <cstdlib>
110#include <iterator>
111#include <limits>
112#include <optional>
113#include <tuple>
114#include <utility>
115#include <vector>
116
117using namespace llvm;
118using namespace llvm::PatternMatch;
119
120#define DEBUG_TYPE "arm-isel"
121
122STATISTIC(NumTailCalls, "Number of tail calls");
123STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
124STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
125STATISTIC(NumConstpoolPromoted,
126 "Number of constants with their storage promoted into constant pools");
127
128static cl::opt<bool>
129ARMInterworking("arm-interworking", cl::Hidden,
130 cl::desc("Enable / disable ARM interworking (for debugging only)"),
131 cl::init(true));
132
134 "arm-promote-constant", cl::Hidden,
135 cl::desc("Enable / disable promotion of unnamed_addr constants into "
136 "constant pools"),
137 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
139 "arm-promote-constant-max-size", cl::Hidden,
140 cl::desc("Maximum size of constant to promote into a constant pool"),
141 cl::init(64));
143 "arm-promote-constant-max-total", cl::Hidden,
144 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
145 cl::init(128));
146
148MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
149 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
150 cl::init(2));
151
152/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
153constexpr MVT FlagsVT = MVT::i32;
154
155// The APCS parameter registers.
156static const MCPhysReg GPRArgRegs[] = {
157 ARM::R0, ARM::R1, ARM::R2, ARM::R3
158};
159
161 SelectionDAG &DAG, const SDLoc &DL) {
163 assert(Arg.ArgVT.bitsLT(MVT::i32));
164 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, Arg.ArgVT, Value);
165 SDValue Ext =
167 MVT::i32, Trunc);
168 return Ext;
169}
170
171void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
172 if (VT != PromotedLdStVT) {
174 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
175
177 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
178 }
179
180 MVT ElemTy = VT.getVectorElementType();
181 if (ElemTy != MVT::f64)
185 if (ElemTy == MVT::i32) {
190 } else {
195 }
204 if (VT.isInteger()) {
208 }
209
210 // Neon does not support vector divide/remainder operations.
219
220 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
221 for (auto Opcode : {ISD::ABS, ISD::ABDS, ISD::ABDU, ISD::SMIN, ISD::SMAX,
223 setOperationAction(Opcode, VT, Legal);
224 if (!VT.isFloatingPoint())
225 for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
226 setOperationAction(Opcode, VT, Legal);
227}
228
229void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
230 addRegisterClass(VT, &ARM::DPRRegClass);
231 addTypeForNEON(VT, MVT::f64);
232}
233
234void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
235 addRegisterClass(VT, &ARM::DPairRegClass);
236 addTypeForNEON(VT, MVT::v2f64);
237}
238
239void ARMTargetLowering::setAllExpand(MVT VT) {
240 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
241 setOperationAction(Opc, VT, Expand);
242
243 // We support these really simple operations even on types where all
244 // the actual arithmetic has to be broken down into simpler
245 // operations or turned into library calls.
250}
251
252void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
253 LegalizeAction Action) {
254 setLoadExtAction(ISD::EXTLOAD, From, To, Action);
255 setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
256 setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
257}
258
259void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
260 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
261
262 for (auto VT : IntTypes) {
263 addRegisterClass(VT, &ARM::MQPRRegClass);
293
294 // No native support for these.
304
305 // Vector reductions
315
316 if (!HasMVEFP) {
321 } else {
324 }
325
326 // Pre and Post inc are supported on loads and stores
327 for (unsigned im = (unsigned)ISD::PRE_INC;
333 }
334 }
335
336 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
337 for (auto VT : FloatTypes) {
338 addRegisterClass(VT, &ARM::MQPRRegClass);
339 if (!HasMVEFP)
340 setAllExpand(VT);
341
342 // These are legal or custom whether we have MVE.fp or not
355
356 // Pre and Post inc are supported on loads and stores
357 for (unsigned im = (unsigned)ISD::PRE_INC;
363 }
364
365 if (HasMVEFP) {
373
374 // No native support for these.
389 }
390 }
391
392 // Custom Expand smaller than legal vector reductions to prevent false zero
393 // items being added.
402
403 // We 'support' these types up to bitcast/load/store level, regardless of
404 // MVE integer-only / float support. Only doing FP data processing on the FP
405 // vector types is inhibited at integer-only level.
406 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
407 for (auto VT : LongTypes) {
408 addRegisterClass(VT, &ARM::MQPRRegClass);
409 setAllExpand(VT);
415 }
417
418 // We can do bitwise operations on v2i64 vectors
419 setOperationAction(ISD::AND, MVT::v2i64, Legal);
420 setOperationAction(ISD::OR, MVT::v2i64, Legal);
421 setOperationAction(ISD::XOR, MVT::v2i64, Legal);
422
423 // It is legal to extload from v4i8 to v4i16 or v4i32.
424 addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
425 addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
426 addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
427
428 // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
434
435 // Some truncating stores are legal too.
436 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
437 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
438 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
439
440 // Pre and Post inc on these are legal, given the correct extends
441 for (unsigned im = (unsigned)ISD::PRE_INC;
443 for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
448 }
449 }
450
451 // Predicate types
452 const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
453 for (auto VT : pTypes) {
454 addRegisterClass(VT, &ARM::VCCRRegClass);
469
470 if (!HasMVEFP) {
475 }
476 }
480 setOperationAction(ISD::OR, MVT::v2i1, Expand);
486
495}
496
498 const ARMSubtarget &STI)
499 : TargetLowering(TM), Subtarget(&STI) {
500 RegInfo = Subtarget->getRegisterInfo();
501 Itins = Subtarget->getInstrItineraryData();
502
505
506 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
507 !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) {
508 bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
509 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
510 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
511 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
513 }
514
515 if (Subtarget->isTargetMachO()) {
516 // Uses VFP for Thumb libfuncs if available.
517 if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
518 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
519 static const struct {
520 const RTLIB::Libcall Op;
521 const char * const Name;
522 const ISD::CondCode Cond;
523 } LibraryCalls[] = {
524 // Single-precision floating-point arithmetic.
525 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
526 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
527 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
528 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
529
530 // Double-precision floating-point arithmetic.
531 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
532 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
533 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
534 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
535
536 // Single-precision comparisons.
537 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
538 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
539 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
540 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
541 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
542 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
543 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
544
545 // Double-precision comparisons.
546 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
547 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
548 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
549 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
550 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
551 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
552 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
553
554 // Floating-point to integer conversions.
555 // i64 conversions are done via library routines even when generating VFP
556 // instructions, so use the same ones.
557 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
558 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
559 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
560 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
561
562 // Conversions between floating types.
563 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
564 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
565
566 // Integer to floating-point conversions.
567 // i64 conversions are done via library routines even when generating VFP
568 // instructions, so use the same ones.
569 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
570 // e.g., __floatunsidf vs. __floatunssidfvfp.
571 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
572 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
573 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
574 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
575 };
576
577 for (const auto &LC : LibraryCalls) {
578 setLibcallName(LC.Op, LC.Name);
579 if (LC.Cond != ISD::SETCC_INVALID)
580 setCmpLibcallCC(LC.Op, LC.Cond);
581 }
582 }
583 }
584
585 // RTLIB
586 if (Subtarget->isAAPCS_ABI() &&
587 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
588 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
589 static const struct {
590 const RTLIB::Libcall Op;
591 const char * const Name;
592 const CallingConv::ID CC;
593 const ISD::CondCode Cond;
594 } LibraryCalls[] = {
595 // Double-precision floating-point arithmetic helper functions
596 // RTABI chapter 4.1.2, Table 2
597 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
598 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
599 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
600 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
601
602 // Double-precision floating-point comparison helper functions
603 // RTABI chapter 4.1.2, Table 3
604 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
605 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
606 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
607 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
608 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
609 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
610 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
611
612 // Single-precision floating-point arithmetic helper functions
613 // RTABI chapter 4.1.2, Table 4
614 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
615 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
616 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
617 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
618
619 // Single-precision floating-point comparison helper functions
620 // RTABI chapter 4.1.2, Table 5
621 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
622 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
623 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
624 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
625 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
626 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
627 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
628
629 // Floating-point to integer conversions.
630 // RTABI chapter 4.1.2, Table 6
631 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
633 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
634 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
635 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
636 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
637 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
638 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
639
640 // Conversions between floating types.
641 // RTABI chapter 4.1.2, Table 7
642 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
643 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
644 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
645
646 // Integer to floating-point conversions.
647 // RTABI chapter 4.1.2, Table 8
648 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
649 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
650 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
651 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
652 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
653 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
654 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
655 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
656
657 // Long long helper functions
658 // RTABI chapter 4.2, Table 9
659 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
660 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
661 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
662 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
663
664 // Integer division functions
665 // RTABI chapter 4.3.1
666 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
667 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
668 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
669 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
670 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
671 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
672 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
673 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
674 };
675
676 for (const auto &LC : LibraryCalls) {
677 setLibcallName(LC.Op, LC.Name);
678 setLibcallCallingConv(LC.Op, LC.CC);
679 if (LC.Cond != ISD::SETCC_INVALID)
680 setCmpLibcallCC(LC.Op, LC.Cond);
681 }
682
683 // EABI dependent RTLIB
684 if (TM.Options.EABIVersion == EABI::EABI4 ||
685 TM.Options.EABIVersion == EABI::EABI5) {
686 static const struct {
687 const RTLIB::Libcall Op;
688 const char *const Name;
689 const CallingConv::ID CC;
690 const ISD::CondCode Cond;
691 } MemOpsLibraryCalls[] = {
692 // Memory operations
693 // RTABI chapter 4.3.4
694 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
695 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
696 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
697 };
698
699 for (const auto &LC : MemOpsLibraryCalls) {
700 setLibcallName(LC.Op, LC.Name);
701 setLibcallCallingConv(LC.Op, LC.CC);
702 if (LC.Cond != ISD::SETCC_INVALID)
703 setCmpLibcallCC(LC.Op, LC.Cond);
704 }
705 }
706 }
707
708 if (Subtarget->isTargetWindows()) {
709 static const struct {
710 const RTLIB::Libcall Op;
711 const char * const Name;
712 const CallingConv::ID CC;
713 } LibraryCalls[] = {
714 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
715 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
716 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
717 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
718 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
719 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
720 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
721 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
722 };
723
724 for (const auto &LC : LibraryCalls) {
725 setLibcallName(LC.Op, LC.Name);
726 setLibcallCallingConv(LC.Op, LC.CC);
727 }
728 }
729
730 // Use divmod compiler-rt calls for iOS 5.0 and later.
731 if (Subtarget->isTargetMachO() &&
732 !(Subtarget->isTargetIOS() &&
733 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
734 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
735 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
736 }
737
738 // The half <-> float conversion functions are always soft-float on
739 // non-watchos platforms, but are needed for some targets which use a
740 // hard-float calling convention by default.
741 if (!Subtarget->isTargetWatchABI()) {
742 if (Subtarget->isAAPCS_ABI()) {
743 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
744 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
745 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
746 } else {
747 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
748 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
749 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
750 }
751 }
752
753 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
754 // a __gnu_ prefix (which is the default).
755 if (Subtarget->isTargetAEABI()) {
756 static const struct {
757 const RTLIB::Libcall Op;
758 const char * const Name;
759 const CallingConv::ID CC;
760 } LibraryCalls[] = {
761 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
762 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
763 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
764 };
765
766 for (const auto &LC : LibraryCalls) {
767 setLibcallName(LC.Op, LC.Name);
768 setLibcallCallingConv(LC.Op, LC.CC);
769 }
770 }
771
772 if (Subtarget->isThumb1Only())
773 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
774 else
775 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
776
777 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
778 Subtarget->hasFPRegs()) {
779 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
780 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
781
786
787 if (!Subtarget->hasVFP2Base())
788 setAllExpand(MVT::f32);
789 if (!Subtarget->hasFP64())
790 setAllExpand(MVT::f64);
791 }
792
793 if (Subtarget->hasFullFP16()) {
794 addRegisterClass(MVT::f16, &ARM::HPRRegClass);
797
800 }
801
802 if (Subtarget->hasBF16()) {
803 addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
804 setAllExpand(MVT::bf16);
805 if (!Subtarget->hasFullFP16())
807 } else {
810 }
811
813 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
814 setTruncStoreAction(VT, InnerVT, Expand);
815 addAllExtLoads(VT, InnerVT, Expand);
816 }
817
820
822 }
823
826
829
830 if (Subtarget->hasMVEIntegerOps())
831 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
832
833 // Combine low-overhead loop intrinsics so that we can lower i1 types.
834 if (Subtarget->hasLOB()) {
836 }
837
838 if (Subtarget->hasNEON()) {
839 addDRTypeForNEON(MVT::v2f32);
840 addDRTypeForNEON(MVT::v8i8);
841 addDRTypeForNEON(MVT::v4i16);
842 addDRTypeForNEON(MVT::v2i32);
843 addDRTypeForNEON(MVT::v1i64);
844
845 addQRTypeForNEON(MVT::v4f32);
846 addQRTypeForNEON(MVT::v2f64);
847 addQRTypeForNEON(MVT::v16i8);
848 addQRTypeForNEON(MVT::v8i16);
849 addQRTypeForNEON(MVT::v4i32);
850 addQRTypeForNEON(MVT::v2i64);
851
852 if (Subtarget->hasFullFP16()) {
853 addQRTypeForNEON(MVT::v8f16);
854 addDRTypeForNEON(MVT::v4f16);
855 }
856
857 if (Subtarget->hasBF16()) {
858 addQRTypeForNEON(MVT::v8bf16);
859 addDRTypeForNEON(MVT::v4bf16);
860 }
861 }
862
863 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
864 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
865 // none of Neon, MVE or VFP supports any arithmetic operations on it.
866 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
867 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
868 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
869 // FIXME: Code duplication: FDIV and FREM are expanded always, see
870 // ARMTargetLowering::addTypeForNEON method for details.
871 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
872 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
873 // FIXME: Create unittest.
874 // In another words, find a way when "copysign" appears in DAG with vector
875 // operands.
877 // FIXME: Code duplication: SETCC has custom operation action, see
878 // ARMTargetLowering::addTypeForNEON method for details.
880 // FIXME: Create unittest for FNEG and for FABS.
881 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
882 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
884 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
885 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
886 setOperationAction(ISD::FTAN, MVT::v2f64, Expand);
887 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
888 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
891 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
894 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
900 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
901 }
902
903 if (Subtarget->hasNEON()) {
904 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
905 // supported for v4f32.
907 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
908 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
909 setOperationAction(ISD::FTAN, MVT::v4f32, Expand);
910 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
911 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
914 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
922
923 // Mark v2f32 intrinsics.
925 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
926 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
927 setOperationAction(ISD::FTAN, MVT::v2f32, Expand);
928 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
929 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
932 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
940
941 // Neon does not support some operations on v1i64 and v2i64 types.
942 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
943 // Custom handling for some quad-vector types to detect VMULL.
944 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
945 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
946 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
947 // Custom handling for some vector types to avoid expensive expansions
948 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
950 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
952 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
953 // a destination type that is wider than the source, and nor does
954 // it have a FP_TO_[SU]INT instruction with a narrower destination than
955 // source.
964
967
968 // NEON does not have single instruction CTPOP for vectors with element
969 // types wider than 8-bits. However, custom lowering can leverage the
970 // v8i8/v16i8 vcnt instruction.
977
978 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
979 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
980
981 // NEON does not have single instruction CTTZ for vectors.
983 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
984 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
985 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
986
987 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
988 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
989 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
990 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
991
996
1001
1005 }
1006
1007 // NEON only has FMA instructions as of VFP4.
1008 if (!Subtarget->hasVFP4Base()) {
1009 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
1010 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
1011 }
1012
1015
1016 // It is legal to extload from v4i8 to v4i16 or v4i32.
1017 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
1018 MVT::v2i32}) {
1023 }
1024 }
1025
1026 for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1027 MVT::v4i32}) {
1032 }
1033 }
1034
1035 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1042 }
1043 if (Subtarget->hasMVEIntegerOps()) {
1046 ISD::SETCC});
1047 }
1048 if (Subtarget->hasMVEFloatOps()) {
1050 }
1051
1052 if (!Subtarget->hasFP64()) {
1053 // When targeting a floating-point unit with only single-precision
1054 // operations, f64 is legal for the few double-precision instructions which
1055 // are present However, no double-precision operations other than moves,
1056 // loads and stores are provided by the hardware.
1094 }
1095
1096 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1099 if (Subtarget->hasFullFP16()) {
1102 }
1103 }
1104
1105 if (!Subtarget->hasFP16()) {
1108 }
1109
1111
1112 // ARM does not have floating-point extending loads.
1113 for (MVT VT : MVT::fp_valuetypes()) {
1114 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1115 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
1116 setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);
1117 }
1118
1119 // ... or truncating stores
1120 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1121 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
1122 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
1123 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
1124 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
1125
1126 // ARM does not have i1 sign extending load.
1127 for (MVT VT : MVT::integer_valuetypes())
1128 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
1129
1130 // ARM supports all 4 flavors of integer indexed load / store.
1131 if (!Subtarget->isThumb1Only()) {
1132 for (unsigned im = (unsigned)ISD::PRE_INC;
1134 setIndexedLoadAction(im, MVT::i1, Legal);
1135 setIndexedLoadAction(im, MVT::i8, Legal);
1136 setIndexedLoadAction(im, MVT::i16, Legal);
1137 setIndexedLoadAction(im, MVT::i32, Legal);
1138 setIndexedStoreAction(im, MVT::i1, Legal);
1139 setIndexedStoreAction(im, MVT::i8, Legal);
1140 setIndexedStoreAction(im, MVT::i16, Legal);
1141 setIndexedStoreAction(im, MVT::i32, Legal);
1142 }
1143 } else {
1144 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1147 }
1148
1153
1156 if (Subtarget->hasDSP()) {
1165 }
1166 if (Subtarget->hasBaseDSP()) {
1169 }
1170
1171 // i64 operation support.
1174 if (Subtarget->isThumb1Only()) {
1177 }
1178 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1179 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1181
1191
1192 // MVE lowers 64 bit shifts to lsll and lsrl
1193 // assuming that ISD::SRL and SRA of i64 are already marked custom
1194 if (Subtarget->hasMVEIntegerOps())
1196
1197 // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1198 if (Subtarget->isThumb1Only()) {
1202 }
1203
1204 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1206
1207 // ARM does not have ROTL.
1212 }
1215 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1218 }
1219
1220 // @llvm.readcyclecounter requires the Performance Monitors extension.
1221 // Default to the 0 expansion on unsupported platforms.
1222 // FIXME: Technically there are older ARM CPUs that have
1223 // implementation-specific ways of obtaining this information.
1224 if (Subtarget->hasPerfMon())
1226
1227 // Only ARMv6 has BSWAP.
1228 if (!Subtarget->hasV6Ops())
1230
1231 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1232 : Subtarget->hasDivideInARMMode();
1233 if (!hasDivide) {
1234 // These are expanded into libcalls if the cpu doesn't have HW divider.
1237 }
1238
1239 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1242
1245 }
1246
1249
1250 // Register based DivRem for AEABI (RTABI 4.2)
1251 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1252 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1253 Subtarget->isTargetWindows()) {
1256 HasStandaloneRem = false;
1257
1258 if (Subtarget->isTargetWindows()) {
1259 const struct {
1260 const RTLIB::Libcall Op;
1261 const char * const Name;
1262 const CallingConv::ID CC;
1263 } LibraryCalls[] = {
1264 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1265 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1266 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1267 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1268
1269 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1270 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1271 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1272 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1273 };
1274
1275 for (const auto &LC : LibraryCalls) {
1276 setLibcallName(LC.Op, LC.Name);
1277 setLibcallCallingConv(LC.Op, LC.CC);
1278 }
1279 } else {
1280 const struct {
1281 const RTLIB::Libcall Op;
1282 const char * const Name;
1283 const CallingConv::ID CC;
1284 } LibraryCalls[] = {
1285 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1286 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1287 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1288 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1289
1290 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1291 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1292 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1293 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1294 };
1295
1296 for (const auto &LC : LibraryCalls) {
1297 setLibcallName(LC.Op, LC.Name);
1298 setLibcallCallingConv(LC.Op, LC.CC);
1299 }
1300 }
1301
1306 } else {
1309 }
1310
1315
1316 setOperationAction(ISD::TRAP, MVT::Other, Legal);
1318
1319 // Use the default implementation.
1321 setOperationAction(ISD::VAARG, MVT::Other, Expand);
1323 setOperationAction(ISD::VAEND, MVT::Other, Expand);
1326
1327 if (Subtarget->isTargetWindows())
1329 else
1331
1332 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1333 // the default expansion.
1334 InsertFencesForAtomic = false;
1335 if (Subtarget->hasAnyDataBarrier() &&
1336 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1337 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1338 // to ldrex/strex loops already.
1340 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1342
1343 // On v8, we have particularly efficient implementations of atomic fences
1344 // if they can be combined with nearby atomic loads and stores.
1345 if (!Subtarget->hasAcquireRelease() ||
1346 getTargetMachine().getOptLevel() == CodeGenOptLevel::None) {
1347 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1348 InsertFencesForAtomic = true;
1349 }
1350 } else {
1351 // If there's anything we can use as a barrier, go through custom lowering
1352 // for ATOMIC_FENCE.
1353 // If target has DMB in thumb, Fences can be inserted.
1354 if (Subtarget->hasDataBarrier())
1355 InsertFencesForAtomic = true;
1356
1358 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1359
1360 // Set them all for libcall, which will force libcalls.
1373 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1374 // Unordered/Monotonic case.
1375 if (!InsertFencesForAtomic) {
1378 }
1379 }
1380
1381 // Compute supported atomic widths.
1382 if (Subtarget->isTargetLinux() ||
1383 (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1384 // For targets where __sync_* routines are reliably available, we use them
1385 // if necessary.
1386 //
1387 // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
1388 // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
1389 //
1390 // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
1391 // such targets should provide __sync_* routines, which use the ARM mode
1392 // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
1393 // encoding; see ARMISD::MEMBARRIER_MCR.)
1395 } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1396 Subtarget->hasForced32BitAtomics()) {
1397 // Cortex-M (besides Cortex-M0) have 32-bit atomics.
1399 } else {
1400 // We can't assume anything about other targets; just use libatomic
1401 // routines.
1403 }
1404
1406
1408
1409 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1410 if (!Subtarget->hasV6Ops()) {
1413 }
1415
1416 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1417 !Subtarget->isThumb1Only()) {
1418 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1419 // iff target supports vfp2.
1429 }
1430
1431 // We want to custom lower some of our intrinsics.
1436 if (Subtarget->useSjLjEH())
1437 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1438
1448 if (Subtarget->hasFullFP16()) {
1452 }
1453
1455
1458 if (Subtarget->hasFullFP16())
1462 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1463
1464 // We don't support sin/cos/fmod/copysign/pow
1473 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1474 !Subtarget->isThumb1Only()) {
1477 }
1480
1481 if (!Subtarget->hasVFP4Base()) {
1484 }
1485
1486 // Various VFP goodness
1487 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1488 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1489 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1492 }
1493
1494 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1495 if (!Subtarget->hasFP16()) {
1498 }
1499
1500 // Strict floating-point comparisons need custom lowering.
1507 }
1508
1509 // Use __sincos_stret if available.
1510 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1511 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1514 }
1515
1516 // FP-ARMv8 implements a lot of rounding-like FP operations.
1517 if (Subtarget->hasFPARMv8Base()) {
1526 if (Subtarget->hasNEON()) {
1531 }
1532
1533 if (Subtarget->hasFP64()) {
1542 }
1543 }
1544
1545 // FP16 often need to be promoted to call lib functions
1546 if (Subtarget->hasFullFP16()) {
1561
1563 }
1564
1565 if (Subtarget->hasNEON()) {
1566 // vmin and vmax aren't available in a scalar form, so we can use
1567 // a NEON instruction with an undef lane instead.
1576
1577 if (Subtarget->hasFullFP16()) {
1582
1587 }
1588 }
1589
1590 // On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has
1591 // it, but it's just a wrapper around ldexp.
1592 if (Subtarget->isTargetWindows()) {
1594 if (isOperationExpand(Op, MVT::f32))
1595 setOperationAction(Op, MVT::f32, Promote);
1596 }
1597
1598 // LegalizeDAG currently can't expand fp16 LDEXP/FREXP on targets where i16
1599 // isn't legal.
1601 if (isOperationExpand(Op, MVT::f16))
1602 setOperationAction(Op, MVT::f16, Promote);
1603
1604 // We have target-specific dag combine patterns for the following nodes:
1605 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1608
1609 if (Subtarget->hasMVEIntegerOps())
1611
1612 if (Subtarget->hasV6Ops())
1614 if (Subtarget->isThumb1Only())
1616 // Attempt to lower smin/smax to ssat/usat
1617 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1618 Subtarget->isThumb2()) {
1620 }
1621
1623
1624 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1625 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1627 else
1629
1630 //// temporary - rewrite interface to use type
1633 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1635 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1637
1638 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1639 // are at least 4 bytes aligned.
1641
1642 // Prefer likely predicted branches to selects on out-of-order cores.
1643 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1644
1647 Align(1ULL << Subtarget->getPreferBranchLogAlignment()));
1648
1649 setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1650}
1651
1653 return Subtarget->useSoftFloat();
1654}
1655
1656// FIXME: It might make sense to define the representative register class as the
1657// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1658// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1659// SPR's representative would be DPR_VFP2. This should work well if register
1660// pressure tracking were modified such that a register use would increment the
1661// pressure of the register class's representative and all of it's super
1662// classes' representatives transitively. We have not implemented this because
1663// of the difficulty prior to coalescing of modeling operand register classes
1664// due to the common occurrence of cross class copies and subregister insertions
1665// and extractions.
1666std::pair<const TargetRegisterClass *, uint8_t>
1668 MVT VT) const {
1669 const TargetRegisterClass *RRC = nullptr;
1670 uint8_t Cost = 1;
1671 switch (VT.SimpleTy) {
1672 default:
1674 // Use DPR as representative register class for all floating point
1675 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1676 // the cost is 1 for both f32 and f64.
1677 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1678 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1679 RRC = &ARM::DPRRegClass;
1680 // When NEON is used for SP, only half of the register file is available
1681 // because operations that define both SP and DP results will be constrained
1682 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1683 // coalescing by double-counting the SP regs. See the FIXME above.
1684 if (Subtarget->useNEONForSinglePrecisionFP())
1685 Cost = 2;
1686 break;
1687 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1688 case MVT::v4f32: case MVT::v2f64:
1689 RRC = &ARM::DPRRegClass;
1690 Cost = 2;
1691 break;
1692 case MVT::v4i64:
1693 RRC = &ARM::DPRRegClass;
1694 Cost = 4;
1695 break;
1696 case MVT::v8i64:
1697 RRC = &ARM::DPRRegClass;
1698 Cost = 8;
1699 break;
1700 }
1701 return std::make_pair(RRC, Cost);
1702}
1703
1704const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1705#define MAKE_CASE(V) \
1706 case V: \
1707 return #V;
1708 switch ((ARMISD::NodeType)Opcode) {
1710 break;
1913#undef MAKE_CASE
1914 }
1915 return nullptr;
1916}
1917
1919 EVT VT) const {
1920 if (!VT.isVector())
1921 return getPointerTy(DL);
1922
1923 // MVE has a predicate register.
1924 if ((Subtarget->hasMVEIntegerOps() &&
1925 (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
1926 VT == MVT::v16i8)) ||
1927 (Subtarget->hasMVEFloatOps() &&
1928 (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
1929 return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1931}
1932
1933/// getRegClassFor - Return the register class that should be used for the
1934/// specified value type.
1935const TargetRegisterClass *
1936ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1937 (void)isDivergent;
1938 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1939 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1940 // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1941 // MVE Q registers.
1942 if (Subtarget->hasNEON()) {
1943 if (VT == MVT::v4i64)
1944 return &ARM::QQPRRegClass;
1945 if (VT == MVT::v8i64)
1946 return &ARM::QQQQPRRegClass;
1947 }
1948 if (Subtarget->hasMVEIntegerOps()) {
1949 if (VT == MVT::v4i64)
1950 return &ARM::MQQPRRegClass;
1951 if (VT == MVT::v8i64)
1952 return &ARM::MQQQQPRRegClass;
1953 }
1955}
1956
1957// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1958// source/dest is aligned and the copy size is large enough. We therefore want
1959// to align such objects passed to memory intrinsics.
1961 Align &PrefAlign) const {
1962 if (!isa<MemIntrinsic>(CI))
1963 return false;
1964 MinSize = 8;
1965 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1966 // cycle faster than 4-byte aligned LDM.
1967 PrefAlign =
1968 (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
1969 return true;
1970}
1971
1972// Create a fast isel object.
1973FastISel *
1975 const TargetLibraryInfo *libInfo) const {
1976 return ARM::createFastISel(funcInfo, libInfo);
1977}
1978
1980 unsigned NumVals = N->getNumValues();
1981 if (!NumVals)
1982 return Sched::RegPressure;
1983
1984 for (unsigned i = 0; i != NumVals; ++i) {
1985 EVT VT = N->getValueType(i);
1986 if (VT == MVT::Glue || VT == MVT::Other)
1987 continue;
1988 if (VT.isFloatingPoint() || VT.isVector())
1989 return Sched::ILP;
1990 }
1991
1992 if (!N->isMachineOpcode())
1993 return Sched::RegPressure;
1994
1995 // Load are scheduled for latency even if there instruction itinerary
1996 // is not available.
1997 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1998 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1999
2000 if (MCID.getNumDefs() == 0)
2001 return Sched::RegPressure;
2002 if (!Itins->isEmpty() &&
2003 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2U)
2004 return Sched::ILP;
2005
2006 return Sched::RegPressure;
2007}
2008
2009//===----------------------------------------------------------------------===//
2010// Lowering Code
2011//===----------------------------------------------------------------------===//
2012
2013static bool isSRL16(const SDValue &Op) {
2014 if (Op.getOpcode() != ISD::SRL)
2015 return false;
2016 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2017 return Const->getZExtValue() == 16;
2018 return false;
2019}
2020
2021static bool isSRA16(const SDValue &Op) {
2022 if (Op.getOpcode() != ISD::SRA)
2023 return false;
2024 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2025 return Const->getZExtValue() == 16;
2026 return false;
2027}
2028
2029static bool isSHL16(const SDValue &Op) {
2030 if (Op.getOpcode() != ISD::SHL)
2031 return false;
2032 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2033 return Const->getZExtValue() == 16;
2034 return false;
2035}
2036
2037// Check for a signed 16-bit value. We special case SRA because it makes it
2038// more simple when also looking for SRAs that aren't sign extending a
2039// smaller value. Without the check, we'd need to take extra care with
2040// checking order for some operations.
2041static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
2042 if (isSRA16(Op))
2043 return isSHL16(Op.getOperand(0));
2044 return DAG.ComputeNumSignBits(Op) == 17;
2045}
2046
2047/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
2049 switch (CC) {
2050 default: llvm_unreachable("Unknown condition code!");
2051 case ISD::SETNE: return ARMCC::NE;
2052 case ISD::SETEQ: return ARMCC::EQ;
2053 case ISD::SETGT: return ARMCC::GT;
2054 case ISD::SETGE: return ARMCC::GE;
2055 case ISD::SETLT: return ARMCC::LT;
2056 case ISD::SETLE: return ARMCC::LE;
2057 case ISD::SETUGT: return ARMCC::HI;
2058 case ISD::SETUGE: return ARMCC::HS;
2059 case ISD::SETULT: return ARMCC::LO;
2060 case ISD::SETULE: return ARMCC::LS;
2061 }
2062}
2063
2064/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
2066 ARMCC::CondCodes &CondCode2) {
2067 CondCode2 = ARMCC::AL;
2068 switch (CC) {
2069 default: llvm_unreachable("Unknown FP condition!");
2070 case ISD::SETEQ:
2071 case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
2072 case ISD::SETGT:
2073 case ISD::SETOGT: CondCode = ARMCC::GT; break;
2074 case ISD::SETGE:
2075 case ISD::SETOGE: CondCode = ARMCC::GE; break;
2076 case ISD::SETOLT: CondCode = ARMCC::MI; break;
2077 case ISD::SETOLE: CondCode = ARMCC::LS; break;
2078 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
2079 case ISD::SETO: CondCode = ARMCC::VC; break;
2080 case ISD::SETUO: CondCode = ARMCC::VS; break;
2081 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
2082 case ISD::SETUGT: CondCode = ARMCC::HI; break;
2083 case ISD::SETUGE: CondCode = ARMCC::PL; break;
2084 case ISD::SETLT:
2085 case ISD::SETULT: CondCode = ARMCC::LT; break;
2086 case ISD::SETLE:
2087 case ISD::SETULE: CondCode = ARMCC::LE; break;
2088 case ISD::SETNE:
2089 case ISD::SETUNE: CondCode = ARMCC::NE; break;
2090 }
2091}
2092
2093//===----------------------------------------------------------------------===//
2094// Calling Convention Implementation
2095//===----------------------------------------------------------------------===//
2096
2097/// getEffectiveCallingConv - Get the effective calling convention, taking into
2098/// account presence of floating point hardware and calling convention
2099/// limitations, such as support for variadic functions.
2101ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
2102 bool isVarArg) const {
2103 switch (CC) {
2104 default:
2105 report_fatal_error("Unsupported calling convention");
2108 case CallingConv::GHC:
2110 return CC;
2116 case CallingConv::Swift:
2119 case CallingConv::C:
2120 case CallingConv::Tail:
2121 if (!Subtarget->isAAPCS_ABI())
2122 return CallingConv::ARM_APCS;
2123 else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
2124 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2125 !isVarArg)
2127 else
2129 case CallingConv::Fast:
2131 if (!Subtarget->isAAPCS_ABI()) {
2132 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2133 return CallingConv::Fast;
2134 return CallingConv::ARM_APCS;
2135 } else if (Subtarget->hasVFP2Base() &&
2136 !Subtarget->isThumb1Only() && !isVarArg)
2138 else
2140 }
2141}
2142
2144 bool isVarArg) const {
2145 return CCAssignFnForNode(CC, false, isVarArg);
2146}
2147
2149 bool isVarArg) const {
2150 return CCAssignFnForNode(CC, true, isVarArg);
2151}
2152
2153/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2154/// CallingConvention.
2155CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2156 bool Return,
2157 bool isVarArg) const {
2158 switch (getEffectiveCallingConv(CC, isVarArg)) {
2159 default:
2160 report_fatal_error("Unsupported calling convention");
2162 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2164 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2166 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2167 case CallingConv::Fast:
2168 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2169 case CallingConv::GHC:
2170 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2172 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2174 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2176 return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2177 }
2178}
2179
2180SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2181 MVT LocVT, MVT ValVT, SDValue Val) const {
2182 Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2183 Val);
2184 if (Subtarget->hasFullFP16()) {
2185 Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2186 } else {
2187 Val = DAG.getNode(ISD::TRUNCATE, dl,
2188 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2189 Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2190 }
2191 return Val;
2192}
2193
2194SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2195 MVT LocVT, MVT ValVT,
2196 SDValue Val) const {
2197 if (Subtarget->hasFullFP16()) {
2198 Val = DAG.getNode(ARMISD::VMOVrh, dl,
2199 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2200 } else {
2201 Val = DAG.getNode(ISD::BITCAST, dl,
2202 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2203 Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2204 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2205 }
2206 return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2207}
2208
2209/// LowerCallResult - Lower the result values of a call into the
2210/// appropriate copies out of appropriate physical registers.
2211SDValue ARMTargetLowering::LowerCallResult(
2212 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
2213 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2214 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2215 SDValue ThisVal, bool isCmseNSCall) const {
2216 // Assign locations to each value returned by this call.
2218 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2219 *DAG.getContext());
2220 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2221
2222 // Copy all of the result registers out of their specified physreg.
2223 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2224 CCValAssign VA = RVLocs[i];
2225
2226 // Pass 'this' value directly from the argument to return value, to avoid
2227 // reg unit interference
2228 if (i == 0 && isThisReturn) {
2229 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
2230 "unexpected return calling convention register assignment");
2231 InVals.push_back(ThisVal);
2232 continue;
2233 }
2234
2235 SDValue Val;
2236 if (VA.needsCustom() &&
2237 (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2238 // Handle f64 or half of a v2f64.
2239 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2240 InGlue);
2241 Chain = Lo.getValue(1);
2242 InGlue = Lo.getValue(2);
2243 VA = RVLocs[++i]; // skip ahead to next loc
2244 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2245 InGlue);
2246 Chain = Hi.getValue(1);
2247 InGlue = Hi.getValue(2);
2248 if (!Subtarget->isLittle())
2249 std::swap (Lo, Hi);
2250 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2251
2252 if (VA.getLocVT() == MVT::v2f64) {
2253 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2254 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2255 DAG.getConstant(0, dl, MVT::i32));
2256
2257 VA = RVLocs[++i]; // skip ahead to next loc
2258 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2259 Chain = Lo.getValue(1);
2260 InGlue = Lo.getValue(2);
2261 VA = RVLocs[++i]; // skip ahead to next loc
2262 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2263 Chain = Hi.getValue(1);
2264 InGlue = Hi.getValue(2);
2265 if (!Subtarget->isLittle())
2266 std::swap (Lo, Hi);
2267 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2268 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2269 DAG.getConstant(1, dl, MVT::i32));
2270 }
2271 } else {
2272 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2273 InGlue);
2274 Chain = Val.getValue(1);
2275 InGlue = Val.getValue(2);
2276 }
2277
2278 switch (VA.getLocInfo()) {
2279 default: llvm_unreachable("Unknown loc info!");
2280 case CCValAssign::Full: break;
2281 case CCValAssign::BCvt:
2282 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2283 break;
2284 }
2285
2286 // f16 arguments have their size extended to 4 bytes and passed as if they
2287 // had been copied to the LSBs of a 32-bit register.
2288 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2289 if (VA.needsCustom() &&
2290 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2291 Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2292
2293 // On CMSE Non-secure Calls, call results (returned values) whose bitwidth
2294 // is less than 32 bits must be sign- or zero-extended after the call for
2295 // security reasons. Although the ABI mandates an extension done by the
2296 // callee, the latter cannot be trusted to follow the rules of the ABI.
2297 const ISD::InputArg &Arg = Ins[VA.getValNo()];
2298 if (isCmseNSCall && Arg.ArgVT.isScalarInteger() &&
2299 VA.getLocVT().isScalarInteger() && Arg.ArgVT.bitsLT(MVT::i32))
2300 Val = handleCMSEValue(Val, Arg, DAG, dl);
2301
2302 InVals.push_back(Val);
2303 }
2304
2305 return Chain;
2306}
2307
2308std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2309 const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
2310 bool IsTailCall, int SPDiff) const {
2311 SDValue DstAddr;
2312 MachinePointerInfo DstInfo;
2313 int32_t Offset = VA.getLocMemOffset();
2315
2316 if (IsTailCall) {
2317 Offset += SPDiff;
2318 auto PtrVT = getPointerTy(DAG.getDataLayout());
2319 int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2320 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2321 DstAddr = DAG.getFrameIndex(FI, PtrVT);
2322 DstInfo =
2324 } else {
2325 SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
2326 DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2327 StackPtr, PtrOff);
2328 DstInfo =
2330 }
2331
2332 return std::make_pair(DstAddr, DstInfo);
2333}
2334
2335// Returns the type of copying which is required to set up a byval argument to
2336// a tail-called function. This isn't needed for non-tail calls, because they
2337// always need the equivalent of CopyOnce, but tail-calls sometimes need two to
2338// avoid clobbering another argument (CopyViaTemp), and sometimes can be
2339// optimised to zero copies when forwarding an argument from the caller's
2340// caller (NoCopy).
2341ARMTargetLowering::ByValCopyKind ARMTargetLowering::ByValNeedsCopyForTailCall(
2342 SelectionDAG &DAG, SDValue Src, SDValue Dst, ISD::ArgFlagsTy Flags) const {
2345
2346 // Globals are always safe to copy from.
2347 if (isa<GlobalAddressSDNode>(Src) || isa<ExternalSymbolSDNode>(Src))
2348 return CopyOnce;
2349
2350 // Can only analyse frame index nodes, conservatively assume we need a
2351 // temporary.
2352 auto *SrcFrameIdxNode = dyn_cast<FrameIndexSDNode>(Src);
2353 auto *DstFrameIdxNode = dyn_cast<FrameIndexSDNode>(Dst);
2354 if (!SrcFrameIdxNode || !DstFrameIdxNode)
2355 return CopyViaTemp;
2356
2357 int SrcFI = SrcFrameIdxNode->getIndex();
2358 int DstFI = DstFrameIdxNode->getIndex();
2359 assert(MFI.isFixedObjectIndex(DstFI) &&
2360 "byval passed in non-fixed stack slot");
2361
2362 int64_t SrcOffset = MFI.getObjectOffset(SrcFI);
2363 int64_t DstOffset = MFI.getObjectOffset(DstFI);
2364
2365 // If the source is in the local frame, then the copy to the argument memory
2366 // is always valid.
2367 bool FixedSrc = MFI.isFixedObjectIndex(SrcFI);
2368 if (!FixedSrc ||
2369 (FixedSrc && SrcOffset < -(int64_t)AFI->getArgRegsSaveSize()))
2370 return CopyOnce;
2371
2372 // In the case of byval arguments split between registers and the stack,
2373 // computeAddrForCallArg returns a FrameIndex which corresponds only to the
2374 // stack portion, but the Src SDValue will refer to the full value, including
2375 // the local stack memory that the register portion gets stored into. We only
2376 // need to compare them for equality, so normalise on the full value version.
2377 uint64_t RegSize = Flags.getByValSize() - MFI.getObjectSize(DstFI);
2378 DstOffset -= RegSize;
2379
2380 // If the value is already in the correct location, then no copying is
2381 // needed. If not, then we need to copy via a temporary.
2382 if (SrcOffset == DstOffset)
2383 return NoCopy;
2384 else
2385 return CopyViaTemp;
2386}
2387
2388void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2389 SDValue Chain, SDValue &Arg,
2390 RegsToPassVector &RegsToPass,
2391 CCValAssign &VA, CCValAssign &NextVA,
2392 SDValue &StackPtr,
2393 SmallVectorImpl<SDValue> &MemOpChains,
2394 bool IsTailCall,
2395 int SPDiff) const {
2396 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2397 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2398 unsigned id = Subtarget->isLittle() ? 0 : 1;
2399 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2400
2401 if (NextVA.isRegLoc())
2402 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2403 else {
2404 assert(NextVA.isMemLoc());
2405 if (!StackPtr.getNode())
2406 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2408
2409 SDValue DstAddr;
2410 MachinePointerInfo DstInfo;
2411 std::tie(DstAddr, DstInfo) =
2412 computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2413 MemOpChains.push_back(
2414 DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2415 }
2416}
2417
2418static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
2419 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
2421}
2422
2423/// LowerCall - Lowering a call into a callseq_start <-
2424/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2425/// nodes.
2426SDValue
2427ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2428 SmallVectorImpl<SDValue> &InVals) const {
2429 SelectionDAG &DAG = CLI.DAG;
2430 SDLoc &dl = CLI.DL;
2432 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2434 SDValue Chain = CLI.Chain;
2435 SDValue Callee = CLI.Callee;
2436 bool &isTailCall = CLI.IsTailCall;
2437 CallingConv::ID CallConv = CLI.CallConv;
2438 bool doesNotRet = CLI.DoesNotReturn;
2439 bool isVarArg = CLI.IsVarArg;
2440
2445 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2446 bool isThisReturn = false;
2447 bool isCmseNSCall = false;
2448 bool isSibCall = false;
2449 bool PreferIndirect = false;
2450 bool GuardWithBTI = false;
2451
2452 // Analyze operands of the call, assigning locations to each operand.
2454 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2455 *DAG.getContext());
2456 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2457
2458 // Lower 'returns_twice' calls to a pseudo-instruction.
2459 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2460 !Subtarget->noBTIAtReturnTwice())
2461 GuardWithBTI = AFI->branchTargetEnforcement();
2462
2463 // Determine whether this is a non-secure function call.
2464 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2465 isCmseNSCall = true;
2466
2467 // Disable tail calls if they're not supported.
2468 if (!Subtarget->supportsTailCall())
2469 isTailCall = false;
2470
2471 // For both the non-secure calls and the returns from a CMSE entry function,
2472 // the function needs to do some extra work after the call, or before the
2473 // return, respectively, thus it cannot end with a tail call
2474 if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2475 isTailCall = false;
2476
2477 if (isa<GlobalAddressSDNode>(Callee)) {
2478 // If we're optimizing for minimum size and the function is called three or
2479 // more times in this block, we can improve codesize by calling indirectly
2480 // as BLXr has a 16-bit encoding.
2481 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2482 if (CLI.CB) {
2483 auto *BB = CLI.CB->getParent();
2484 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2485 count_if(GV->users(), [&BB](const User *U) {
2486 return isa<Instruction>(U) &&
2487 cast<Instruction>(U)->getParent() == BB;
2488 }) > 2;
2489 }
2490 }
2491 if (isTailCall) {
2492 // Check if it's really possible to do a tail call.
2493 isTailCall =
2494 IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, PreferIndirect);
2495
2496 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2497 CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2498 isSibCall = true;
2499
2500 // We don't support GuaranteedTailCallOpt for ARM, only automatically
2501 // detected sibcalls.
2502 if (isTailCall)
2503 ++NumTailCalls;
2504 }
2505
2506 if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2507 report_fatal_error("failed to perform tail call elimination on a call "
2508 "site marked musttail");
2509
2510 // Get a count of how many bytes are to be pushed on the stack.
2511 unsigned NumBytes = CCInfo.getStackSize();
2512
2513 // SPDiff is the byte offset of the call's argument area from the callee's.
2514 // Stores to callee stack arguments will be placed in FixedStackSlots offset
2515 // by this amount for a tail call. In a sibling call it must be 0 because the
2516 // caller will deallocate the entire stack and the callee still expects its
2517 // arguments to begin at SP+0. Completely unused for non-tail calls.
2518 int SPDiff = 0;
2519
2520 if (isTailCall && !isSibCall) {
2521 auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2522 unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2523
2524 // Since callee will pop argument stack as a tail call, we must keep the
2525 // popped size 16-byte aligned.
2526 MaybeAlign StackAlign = DAG.getDataLayout().getStackAlignment();
2527 assert(StackAlign && "data layout string is missing stack alignment");
2528 NumBytes = alignTo(NumBytes, *StackAlign);
2529
2530 // SPDiff will be negative if this tail call requires more space than we
2531 // would automatically have in our incoming argument space. Positive if we
2532 // can actually shrink the stack.
2533 SPDiff = NumReusableBytes - NumBytes;
2534
2535 // If this call requires more stack than we have available from
2536 // LowerFormalArguments, tell FrameLowering to reserve space for it.
2537 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2538 AFI->setArgRegsSaveSize(-SPDiff);
2539 }
2540
2541 if (isSibCall) {
2542 // For sibling tail calls, memory operands are available in our caller's stack.
2543 NumBytes = 0;
2544 } else {
2545 // Adjust the stack pointer for the new arguments...
2546 // These operations are automatically eliminated by the prolog/epilog pass
2547 Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2548 }
2549
2551 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2552
2553 RegsToPassVector RegsToPass;
2554 SmallVector<SDValue, 8> MemOpChains;
2555
2556 // If we are doing a tail-call, any byval arguments will be written to stack
2557 // space which was used for incoming arguments. If any the values being used
2558 // are incoming byval arguments to this function, then they might be
2559 // overwritten by the stores of the outgoing arguments. To avoid this, we
2560 // need to make a temporary copy of them in local stack space, then copy back
2561 // to the argument area.
2562 DenseMap<unsigned, SDValue> ByValTemporaries;
2563 SDValue ByValTempChain;
2564 if (isTailCall) {
2565 SmallVector<SDValue, 8> ByValCopyChains;
2566 for (const CCValAssign &VA : ArgLocs) {
2567 unsigned ArgIdx = VA.getValNo();
2568 SDValue Src = OutVals[ArgIdx];
2569 ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags;
2570
2571 if (!Flags.isByVal())
2572 continue;
2573
2574 SDValue Dst;
2575 MachinePointerInfo DstInfo;
2576 std::tie(Dst, DstInfo) =
2577 computeAddrForCallArg(dl, DAG, VA, SDValue(), true, SPDiff);
2578 ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags);
2579
2580 if (Copy == NoCopy) {
2581 // If the argument is already at the correct offset on the stack
2582 // (because we are forwarding a byval argument from our caller), we
2583 // don't need any copying.
2584 continue;
2585 } else if (Copy == CopyOnce) {
2586 // If the argument is in our local stack frame, no other argument
2587 // preparation can clobber it, so we can copy it to the final location
2588 // later.
2589 ByValTemporaries[ArgIdx] = Src;
2590 } else {
2591 assert(Copy == CopyViaTemp && "unexpected enum value");
2592 // If we might be copying this argument from the outgoing argument
2593 // stack area, we need to copy via a temporary in the local stack
2594 // frame.
2595 int TempFrameIdx = MFI.CreateStackObject(
2596 Flags.getByValSize(), Flags.getNonZeroByValAlign(), false);
2597 SDValue Temp =
2598 DAG.getFrameIndex(TempFrameIdx, getPointerTy(DAG.getDataLayout()));
2599
2600 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2601 SDValue AlignNode =
2602 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2603
2604 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2605 SDValue Ops[] = {Chain, Temp, Src, SizeNode, AlignNode};
2606 ByValCopyChains.push_back(
2607 DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, Ops));
2608 ByValTemporaries[ArgIdx] = Temp;
2609 }
2610 }
2611 if (!ByValCopyChains.empty())
2612 ByValTempChain =
2613 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains);
2614 }
2615
2616 // During a tail call, stores to the argument area must happen after all of
2617 // the function's incoming arguments have been loaded because they may alias.
2618 // This is done by folding in a TokenFactor from LowerFormalArguments, but
2619 // there's no point in doing so repeatedly so this tracks whether that's
2620 // happened yet.
2621 bool AfterFormalArgLoads = false;
2622
2623 // Walk the register/memloc assignments, inserting copies/loads. In the case
2624 // of tail call optimization, arguments are handled later.
2625 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2626 i != e;
2627 ++i, ++realArgIdx) {
2628 CCValAssign &VA = ArgLocs[i];
2629 SDValue Arg = OutVals[realArgIdx];
2630 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2631 bool isByVal = Flags.isByVal();
2632
2633 // Promote the value if needed.
2634 switch (VA.getLocInfo()) {
2635 default: llvm_unreachable("Unknown loc info!");
2636 case CCValAssign::Full: break;
2637 case CCValAssign::SExt:
2638 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2639 break;
2640 case CCValAssign::ZExt:
2641 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2642 break;
2643 case CCValAssign::AExt:
2644 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2645 break;
2646 case CCValAssign::BCvt:
2647 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2648 break;
2649 }
2650
2651 if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2652 Chain = DAG.getStackArgumentTokenFactor(Chain);
2653 if (ByValTempChain)
2654 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain,
2655 ByValTempChain);
2656 AfterFormalArgLoads = true;
2657 }
2658
2659 // f16 arguments have their size extended to 4 bytes and passed as if they
2660 // had been copied to the LSBs of a 32-bit register.
2661 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2662 if (VA.needsCustom() &&
2663 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2664 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2665 } else {
2666 // f16 arguments could have been extended prior to argument lowering.
2667 // Mask them arguments if this is a CMSE nonsecure call.
2668 auto ArgVT = Outs[realArgIdx].ArgVT;
2669 if (isCmseNSCall && (ArgVT == MVT::f16)) {
2670 auto LocBits = VA.getLocVT().getSizeInBits();
2671 auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2672 SDValue Mask =
2673 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2674 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2675 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2676 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2677 }
2678 }
2679
2680 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2681 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2682 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2683 DAG.getConstant(0, dl, MVT::i32));
2684 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2685 DAG.getConstant(1, dl, MVT::i32));
2686
2687 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2688 StackPtr, MemOpChains, isTailCall, SPDiff);
2689
2690 VA = ArgLocs[++i]; // skip ahead to next loc
2691 if (VA.isRegLoc()) {
2692 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2693 StackPtr, MemOpChains, isTailCall, SPDiff);
2694 } else {
2695 assert(VA.isMemLoc());
2696 SDValue DstAddr;
2697 MachinePointerInfo DstInfo;
2698 std::tie(DstAddr, DstInfo) =
2699 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2700 MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2701 }
2702 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2703 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2704 StackPtr, MemOpChains, isTailCall, SPDiff);
2705 } else if (VA.isRegLoc()) {
2706 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2707 Outs[0].VT == MVT::i32) {
2708 assert(VA.getLocVT() == MVT::i32 &&
2709 "unexpected calling convention register assignment");
2710 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2711 "unexpected use of 'returned'");
2712 isThisReturn = true;
2713 }
2714 const TargetOptions &Options = DAG.getTarget().Options;
2715 if (Options.EmitCallSiteInfo)
2716 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
2717 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2718 } else if (isByVal) {
2719 assert(VA.isMemLoc());
2720 unsigned offset = 0;
2721
2722 // True if this byval aggregate will be split between registers
2723 // and memory.
2724 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2725 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2726
2727 SDValue ByValSrc;
2728 bool NeedsStackCopy;
2729 if (ByValTemporaries.contains(realArgIdx)) {
2730 ByValSrc = ByValTemporaries[realArgIdx];
2731 NeedsStackCopy = true;
2732 } else {
2733 ByValSrc = Arg;
2734 NeedsStackCopy = !isTailCall;
2735 }
2736
2737 // If part of the argument is in registers, load them.
2738 if (CurByValIdx < ByValArgsCount) {
2739 unsigned RegBegin, RegEnd;
2740 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2741
2742 EVT PtrVT =
2744 unsigned int i, j;
2745 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2746 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2747 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, ByValSrc, Const);
2748 SDValue Load =
2749 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2750 DAG.InferPtrAlign(AddArg));
2751 MemOpChains.push_back(Load.getValue(1));
2752 RegsToPass.push_back(std::make_pair(j, Load));
2753 }
2754
2755 // If parameter size outsides register area, "offset" value
2756 // helps us to calculate stack slot for remained part properly.
2757 offset = RegEnd - RegBegin;
2758
2759 CCInfo.nextInRegsParam();
2760 }
2761
2762 // If the memory part of the argument isn't already in the correct place
2763 // (which can happen with tail calls), copy it into the argument area.
2764 if (NeedsStackCopy && Flags.getByValSize() > 4 * offset) {
2765 auto PtrVT = getPointerTy(DAG.getDataLayout());
2766 SDValue Dst;
2767 MachinePointerInfo DstInfo;
2768 std::tie(Dst, DstInfo) =
2769 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2770 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2771 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, ByValSrc, SrcOffset);
2772 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2773 MVT::i32);
2774 SDValue AlignNode =
2775 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2776
2777 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2778 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2779 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2780 Ops));
2781 }
2782 } else {
2783 assert(VA.isMemLoc());
2784 SDValue DstAddr;
2785 MachinePointerInfo DstInfo;
2786 std::tie(DstAddr, DstInfo) =
2787 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2788
2789 SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2790 MemOpChains.push_back(Store);
2791 }
2792 }
2793
2794 if (!MemOpChains.empty())
2795 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2796
2797 // Build a sequence of copy-to-reg nodes chained together with token chain
2798 // and flag operands which copy the outgoing args into the appropriate regs.
2799 SDValue InGlue;
2800 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2801 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2802 RegsToPass[i].second, InGlue);
2803 InGlue = Chain.getValue(1);
2804 }
2805
2806 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2807 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2808 // node so that legalize doesn't hack it.
2809 bool isDirect = false;
2810
2812 const GlobalValue *GVal = nullptr;
2813 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2814 GVal = G->getGlobal();
2815 bool isStub = !TM.shouldAssumeDSOLocal(GVal) && Subtarget->isTargetMachO();
2816
2817 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2818 bool isLocalARMFunc = false;
2819 auto PtrVt = getPointerTy(DAG.getDataLayout());
2820
2821 if (Subtarget->genLongCalls()) {
2822 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2823 "long-calls codegen is not position independent!");
2824 // Handle a global address or an external symbol. If it's not one of
2825 // those, the target's already in a register, so we don't need to do
2826 // anything extra.
2827 if (isa<GlobalAddressSDNode>(Callee)) {
2828 if (Subtarget->genExecuteOnly()) {
2829 if (Subtarget->useMovt())
2830 ++NumMovwMovt;
2831 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2832 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2833 } else {
2834 // Create a constant pool entry for the callee address
2835 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2837 GVal, ARMPCLabelIndex, ARMCP::CPValue, 0);
2838
2839 // Get the address of the callee into a register
2840 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2841 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2842 Callee = DAG.getLoad(
2843 PtrVt, dl, DAG.getEntryNode(), Addr,
2845 }
2846 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2847 const char *Sym = S->getSymbol();
2848
2849 if (Subtarget->genExecuteOnly()) {
2850 if (Subtarget->useMovt())
2851 ++NumMovwMovt;
2852 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2853 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2854 } else {
2855 // Create a constant pool entry for the callee address
2856 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2858 *DAG.getContext(), Sym, ARMPCLabelIndex, 0);
2859
2860 // Get the address of the callee into a register
2861 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2862 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2863 Callee = DAG.getLoad(
2864 PtrVt, dl, DAG.getEntryNode(), Addr,
2866 }
2867 }
2868 } else if (isa<GlobalAddressSDNode>(Callee)) {
2869 if (!PreferIndirect) {
2870 isDirect = true;
2871 bool isDef = GVal->isStrongDefinitionForLinker();
2872
2873 // ARM call to a local ARM function is predicable.
2874 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2875 // tBX takes a register source operand.
2876 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2877 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2878 Callee = DAG.getNode(
2879 ARMISD::WrapperPIC, dl, PtrVt,
2880 DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2881 Callee = DAG.getLoad(
2882 PtrVt, dl, DAG.getEntryNode(), Callee,
2886 } else if (Subtarget->isTargetCOFF()) {
2887 assert(Subtarget->isTargetWindows() &&
2888 "Windows is the only supported COFF target");
2889 unsigned TargetFlags = ARMII::MO_NO_FLAG;
2890 if (GVal->hasDLLImportStorageClass())
2891 TargetFlags = ARMII::MO_DLLIMPORT;
2892 else if (!TM.shouldAssumeDSOLocal(GVal))
2893 TargetFlags = ARMII::MO_COFFSTUB;
2894 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, /*offset=*/0,
2895 TargetFlags);
2896 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2897 Callee =
2898 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2899 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2901 } else {
2902 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, 0);
2903 }
2904 }
2905 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2906 isDirect = true;
2907 // tBX takes a register source operand.
2908 const char *Sym = S->getSymbol();
2909 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2910 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2913 ARMPCLabelIndex, 4);
2914 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2915 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2916 Callee = DAG.getLoad(
2917 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2919 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2920 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2921 } else {
2922 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2923 }
2924 }
2925
2926 if (isCmseNSCall) {
2927 assert(!isARMFunc && !isDirect &&
2928 "Cannot handle call to ARM function or direct call");
2929 if (NumBytes > 0) {
2931 "call to non-secure function would "
2932 "require passing arguments on stack",
2933 dl.getDebugLoc());
2934 DAG.getContext()->diagnose(Diag);
2935 }
2936 if (isStructRet) {
2939 "call to non-secure function would return value through pointer",
2940 dl.getDebugLoc());
2941 DAG.getContext()->diagnose(Diag);
2942 }
2943 }
2944
2945 // FIXME: handle tail calls differently.
2946 unsigned CallOpc;
2947 if (Subtarget->isThumb()) {
2948 if (GuardWithBTI)
2949 CallOpc = ARMISD::t2CALL_BTI;
2950 else if (isCmseNSCall)
2951 CallOpc = ARMISD::tSECALL;
2952 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2953 CallOpc = ARMISD::CALL_NOLINK;
2954 else
2955 CallOpc = ARMISD::CALL;
2956 } else {
2957 if (!isDirect && !Subtarget->hasV5TOps())
2958 CallOpc = ARMISD::CALL_NOLINK;
2959 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2960 // Emit regular call when code size is the priority
2961 !Subtarget->hasMinSize())
2962 // "mov lr, pc; b _foo" to avoid confusing the RSP
2963 CallOpc = ARMISD::CALL_NOLINK;
2964 else
2965 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2966 }
2967
2968 // We don't usually want to end the call-sequence here because we would tidy
2969 // the frame up *after* the call, however in the ABI-changing tail-call case
2970 // we've carefully laid out the parameters so that when sp is reset they'll be
2971 // in the correct location.
2972 if (isTailCall && !isSibCall) {
2973 Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, dl);
2974 InGlue = Chain.getValue(1);
2975 }
2976
2977 std::vector<SDValue> Ops;
2978 Ops.push_back(Chain);
2979 Ops.push_back(Callee);
2980
2981 if (isTailCall) {
2982 Ops.push_back(DAG.getSignedTargetConstant(SPDiff, dl, MVT::i32));
2983 }
2984
2985 // Add argument registers to the end of the list so that they are known live
2986 // into the call.
2987 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2988 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2989 RegsToPass[i].second.getValueType()));
2990
2991 // Add a register mask operand representing the call-preserved registers.
2992 const uint32_t *Mask;
2993 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2994 if (isThisReturn) {
2995 // For 'this' returns, use the R0-preserving mask if applicable
2996 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2997 if (!Mask) {
2998 // Set isThisReturn to false if the calling convention is not one that
2999 // allows 'returned' to be modeled in this way, so LowerCallResult does
3000 // not try to pass 'this' straight through
3001 isThisReturn = false;
3002 Mask = ARI->getCallPreservedMask(MF, CallConv);
3003 }
3004 } else
3005 Mask = ARI->getCallPreservedMask(MF, CallConv);
3006
3007 assert(Mask && "Missing call preserved mask for calling convention");
3008 Ops.push_back(DAG.getRegisterMask(Mask));
3009
3010 if (InGlue.getNode())
3011 Ops.push_back(InGlue);
3012
3013 if (isTailCall) {
3015 SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, MVT::Other, Ops);
3016 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
3017 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
3018 return Ret;
3019 }
3020
3021 // Returns a chain and a flag for retval copy to use.
3022 Chain = DAG.getNode(CallOpc, dl, {MVT::Other, MVT::Glue}, Ops);
3023 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
3024 InGlue = Chain.getValue(1);
3025 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
3026
3027 // If we're guaranteeing tail-calls will be honoured, the callee must
3028 // pop its own argument stack on return. But this call is *not* a tail call so
3029 // we need to undo that after it returns to restore the status-quo.
3030 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
3031 uint64_t CalleePopBytes =
3032 canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1U;
3033
3034 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, dl);
3035 if (!Ins.empty())
3036 InGlue = Chain.getValue(1);
3037
3038 // Handle result values, copying them out of physregs into vregs that we
3039 // return.
3040 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
3041 InVals, isThisReturn,
3042 isThisReturn ? OutVals[0] : SDValue(), isCmseNSCall);
3043}
3044
3045/// HandleByVal - Every parameter *after* a byval parameter is passed
3046/// on the stack. Remember the next parameter register to allocate,
3047/// and then confiscate the rest of the parameter registers to insure
3048/// this.
3049void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
3050 Align Alignment) const {
3051 // Byval (as with any stack) slots are always at least 4 byte aligned.
3052 Alignment = std::max(Alignment, Align(4));
3053
3055 if (!Reg)
3056 return;
3057
3058 unsigned AlignInRegs = Alignment.value() / 4;
3059 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
3060 for (unsigned i = 0; i < Waste; ++i)
3061 Reg = State->AllocateReg(GPRArgRegs);
3062
3063 if (!Reg)
3064 return;
3065
3066 unsigned Excess = 4 * (ARM::R4 - Reg);
3067
3068 // Special case when NSAA != SP and parameter size greater than size of
3069 // all remained GPR regs. In that case we can't split parameter, we must
3070 // send it to stack. We also must set NCRN to R4, so waste all
3071 // remained registers.
3072 const unsigned NSAAOffset = State->getStackSize();
3073 if (NSAAOffset != 0 && Size > Excess) {
3074 while (State->AllocateReg(GPRArgRegs))
3075 ;
3076 return;
3077 }
3078
3079 // First register for byval parameter is the first register that wasn't
3080 // allocated before this method call, so it would be "reg".
3081 // If parameter is small enough to be saved in range [reg, r4), then
3082 // the end (first after last) register would be reg + param-size-in-regs,
3083 // else parameter would be splitted between registers and stack,
3084 // end register would be r4 in this case.
3085 unsigned ByValRegBegin = Reg;
3086 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
3087 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
3088 // Note, first register is allocated in the beginning of function already,
3089 // allocate remained amount of registers we need.
3090 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
3091 State->AllocateReg(GPRArgRegs);
3092 // A byval parameter that is split between registers and memory needs its
3093 // size truncated here.
3094 // In the case where the entire structure fits in registers, we set the
3095 // size in memory to zero.
3096 Size = std::max<int>(Size - Excess, 0);
3097}
3098
3099/// IsEligibleForTailCallOptimization - Check whether the call is eligible
3100/// for tail call optimization. Targets which want to do tail call
3101/// optimization should implement this function. Note that this function also
3102/// processes musttail calls, so when this function returns false on a valid
3103/// musttail call, a fatal backend error occurs.
3104bool ARMTargetLowering::IsEligibleForTailCallOptimization(
3106 SmallVectorImpl<CCValAssign> &ArgLocs, const bool isIndirect) const {
3107 CallingConv::ID CalleeCC = CLI.CallConv;
3108 SDValue Callee = CLI.Callee;
3109 bool isVarArg = CLI.IsVarArg;
3110 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3111 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3113 const SelectionDAG &DAG = CLI.DAG;
3115 const Function &CallerF = MF.getFunction();
3116 CallingConv::ID CallerCC = CallerF.getCallingConv();
3117
3118 assert(Subtarget->supportsTailCall());
3119
3120 // Indirect tail-calls require a register to hold the target address. That
3121 // register must be:
3122 // * Allocatable (i.e. r0-r7 if the target is Thumb1).
3123 // * Not callee-saved, so must be one of r0-r3 or r12.
3124 // * Not used to hold an argument to the tail-called function, which might be
3125 // in r0-r3.
3126 // * Not used to hold the return address authentication code, which is in r12
3127 // if enabled.
3128 // Sometimes, no register matches all of these conditions, so we can't do a
3129 // tail-call.
3130 if (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect) {
3131 SmallSet<MCPhysReg, 5> AddressRegisters;
3132 for (Register R : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
3133 AddressRegisters.insert(R);
3134 if (!(Subtarget->isThumb1Only() ||
3136 AddressRegisters.insert(ARM::R12);
3137 for (const CCValAssign &AL : ArgLocs)
3138 if (AL.isRegLoc())
3139 AddressRegisters.erase(AL.getLocReg());
3140 if (AddressRegisters.empty()) {
3141 LLVM_DEBUG(dbgs() << "false (no reg to hold function pointer)\n");
3142 return false;
3143 }
3144 }
3145
3146 // Look for obvious safe cases to perform tail call optimization that do not
3147 // require ABI changes. This is what gcc calls sibcall.
3148
3149 // Exception-handling functions need a special set of instructions to indicate
3150 // a return to the hardware. Tail-calling another function would probably
3151 // break this.
3152 if (CallerF.hasFnAttribute("interrupt")) {
3153 LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n");
3154 return false;
3155 }
3156
3157 if (canGuaranteeTCO(CalleeCC,
3158 getTargetMachine().Options.GuaranteedTailCallOpt)) {
3159 LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false")
3160 << " (guaranteed tail-call CC)\n");
3161 return CalleeCC == CallerCC;
3162 }
3163
3164 // Also avoid sibcall optimization if either caller or callee uses struct
3165 // return semantics.
3166 bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
3167 bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
3168 if (isCalleeStructRet != isCallerStructRet) {
3169 LLVM_DEBUG(dbgs() << "false (struct-ret)\n");
3170 return false;
3171 }
3172
3173 // Externally-defined functions with weak linkage should not be
3174 // tail-called on ARM when the OS does not support dynamic
3175 // pre-emption of symbols, as the AAELF spec requires normal calls
3176 // to undefined weak functions to be replaced with a NOP or jump to the
3177 // next instruction. The behaviour of branch instructions in this
3178 // situation (as used for tail calls) is implementation-defined, so we
3179 // cannot rely on the linker replacing the tail call with a return.
3180 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3181 const GlobalValue *GV = G->getGlobal();
3183 if (GV->hasExternalWeakLinkage() &&
3184 (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
3185 TT.isOSBinFormatMachO())) {
3186 LLVM_DEBUG(dbgs() << "false (external weak linkage)\n");
3187 return false;
3188 }
3189 }
3190
3191 // Check that the call results are passed in the same way.
3192 LLVMContext &C = *DAG.getContext();
3194 getEffectiveCallingConv(CalleeCC, isVarArg),
3195 getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
3196 CCAssignFnForReturn(CalleeCC, isVarArg),
3197 CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) {
3198 LLVM_DEBUG(dbgs() << "false (incompatible results)\n");
3199 return false;
3200 }
3201 // The callee has to preserve all registers the caller needs to preserve.
3202 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3203 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3204 if (CalleeCC != CallerCC) {
3205 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3206 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) {
3207 LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n");
3208 return false;
3209 }
3210 }
3211
3212 // If Caller's vararg argument has been split between registers and stack, do
3213 // not perform tail call, since part of the argument is in caller's local
3214 // frame.
3215 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3216 if (CLI.IsVarArg && AFI_Caller->getArgRegsSaveSize()) {
3217 LLVM_DEBUG(dbgs() << "false (arg reg save area)\n");
3218 return false;
3219 }
3220
3221 // If the callee takes no arguments then go on to check the results of the
3222 // call.
3223 const MachineRegisterInfo &MRI = MF.getRegInfo();
3224 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) {
3225 LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n");
3226 return false;
3227 }
3228
3229 // If the stack arguments for this call do not fit into our own save area then
3230 // the call cannot be made tail.
3231 if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize())
3232 return false;
3233
3234 LLVM_DEBUG(dbgs() << "true\n");
3235 return true;
3236}
3237
3238bool
3239ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3240 MachineFunction &MF, bool isVarArg,
3242 LLVMContext &Context) const {
3244 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3245 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3246}
3247
3249 const SDLoc &DL, SelectionDAG &DAG) {
3250 const MachineFunction &MF = DAG.getMachineFunction();
3251 const Function &F = MF.getFunction();
3252
3253 StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
3254
3255 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
3256 // version of the "preferred return address". These offsets affect the return
3257 // instruction if this is a return from PL1 without hypervisor extensions.
3258 // IRQ/FIQ: +4 "subs pc, lr, #4"
3259 // SWI: 0 "subs pc, lr, #0"
3260 // ABORT: +4 "subs pc, lr, #4"
3261 // UNDEF: +4/+2 "subs pc, lr, #0"
3262 // UNDEF varies depending on where the exception came from ARM or Thumb
3263 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
3264
3265 int64_t LROffset;
3266 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3267 IntKind == "ABORT")
3268 LROffset = 4;
3269 else if (IntKind == "SWI" || IntKind == "UNDEF")
3270 LROffset = 0;
3271 else
3272 report_fatal_error("Unsupported interrupt attribute. If present, value "
3273 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3274
3275 RetOps.insert(RetOps.begin() + 1,
3276 DAG.getConstant(LROffset, DL, MVT::i32, false));
3277
3278 return DAG.getNode(ARMISD::INTRET_GLUE, DL, MVT::Other, RetOps);
3279}
3280
3281SDValue
3282ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3283 bool isVarArg,
3285 const SmallVectorImpl<SDValue> &OutVals,
3286 const SDLoc &dl, SelectionDAG &DAG) const {
3287 // CCValAssign - represent the assignment of the return value to a location.
3289
3290 // CCState - Info about the registers and stack slots.
3291 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3292 *DAG.getContext());
3293
3294 // Analyze outgoing return values.
3295 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3296
3297 SDValue Glue;
3299 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3300 bool isLittleEndian = Subtarget->isLittle();
3301
3304 AFI->setReturnRegsCount(RVLocs.size());
3305
3306 // Report error if cmse entry function returns structure through first ptr arg.
3307 if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3308 // Note: using an empty SDLoc(), as the first line of the function is a
3309 // better place to report than the last line.
3312 "secure entry function would return value through pointer",
3313 SDLoc().getDebugLoc());
3314 DAG.getContext()->diagnose(Diag);
3315 }
3316
3317 // Copy the result values into the output registers.
3318 for (unsigned i = 0, realRVLocIdx = 0;
3319 i != RVLocs.size();
3320 ++i, ++realRVLocIdx) {
3321 CCValAssign &VA = RVLocs[i];
3322 assert(VA.isRegLoc() && "Can only return in registers!");
3323
3324 SDValue Arg = OutVals[realRVLocIdx];
3325 bool ReturnF16 = false;
3326
3327 if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3328 // Half-precision return values can be returned like this:
3329 //
3330 // t11 f16 = fadd ...
3331 // t12: i16 = bitcast t11
3332 // t13: i32 = zero_extend t12
3333 // t14: f32 = bitcast t13 <~~~~~~~ Arg
3334 //
3335 // to avoid code generation for bitcasts, we simply set Arg to the node
3336 // that produces the f16 value, t11 in this case.
3337 //
3338 if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3339 SDValue ZE = Arg.getOperand(0);
3340 if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3341 SDValue BC = ZE.getOperand(0);
3342 if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3343 Arg = BC.getOperand(0);
3344 ReturnF16 = true;
3345 }
3346 }
3347 }
3348 }
3349
3350 switch (VA.getLocInfo()) {
3351 default: llvm_unreachable("Unknown loc info!");
3352 case CCValAssign::Full: break;
3353 case CCValAssign::BCvt:
3354 if (!ReturnF16)
3355 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3356 break;
3357 }
3358
3359 // Mask f16 arguments if this is a CMSE nonsecure entry.
3360 auto RetVT = Outs[realRVLocIdx].ArgVT;
3361 if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3362 if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3363 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3364 } else {
3365 auto LocBits = VA.getLocVT().getSizeInBits();
3366 auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3367 SDValue Mask =
3368 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3369 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3370 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3371 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3372 }
3373 }
3374
3375 if (VA.needsCustom() &&
3376 (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3377 if (VA.getLocVT() == MVT::v2f64) {
3378 // Extract the first half and return it in two registers.
3379 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3380 DAG.getConstant(0, dl, MVT::i32));
3381 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3382 DAG.getVTList(MVT::i32, MVT::i32), Half);
3383
3384 Chain =
3385 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3386 HalfGPRs.getValue(isLittleEndian ? 0 : 1), Glue);
3387 Glue = Chain.getValue(1);
3388 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3389 VA = RVLocs[++i]; // skip ahead to next loc
3390 Chain =
3391 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3392 HalfGPRs.getValue(isLittleEndian ? 1 : 0), Glue);
3393 Glue = Chain.getValue(1);
3394 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3395 VA = RVLocs[++i]; // skip ahead to next loc
3396
3397 // Extract the 2nd half and fall through to handle it as an f64 value.
3398 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3399 DAG.getConstant(1, dl, MVT::i32));
3400 }
3401 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3402 // available.
3403 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3404 DAG.getVTList(MVT::i32, MVT::i32), Arg);
3405 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3406 fmrrd.getValue(isLittleEndian ? 0 : 1), Glue);
3407 Glue = Chain.getValue(1);
3408 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3409 VA = RVLocs[++i]; // skip ahead to next loc
3410 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3411 fmrrd.getValue(isLittleEndian ? 1 : 0), Glue);
3412 } else
3413 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
3414
3415 // Guarantee that all emitted copies are
3416 // stuck together, avoiding something bad.
3417 Glue = Chain.getValue(1);
3418 RetOps.push_back(DAG.getRegister(
3419 VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3420 }
3421 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3422 const MCPhysReg *I =
3423 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3424 if (I) {
3425 for (; *I; ++I) {
3426 if (ARM::GPRRegClass.contains(*I))
3427 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3428 else if (ARM::DPRRegClass.contains(*I))
3430 else
3431 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3432 }
3433 }
3434
3435 // Update chain and glue.
3436 RetOps[0] = Chain;
3437 if (Glue.getNode())
3438 RetOps.push_back(Glue);
3439
3440 // CPUs which aren't M-class use a special sequence to return from
3441 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3442 // though we use "subs pc, lr, #N").
3443 //
3444 // M-class CPUs actually use a normal return sequence with a special
3445 // (hardware-provided) value in LR, so the normal code path works.
3446 if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3447 !Subtarget->isMClass()) {
3448 if (Subtarget->isThumb1Only())
3449 report_fatal_error("interrupt attribute is not supported in Thumb1");
3450 return LowerInterruptReturn(RetOps, dl, DAG);
3451 }
3452
3455 return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3456}
3457
3458bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3459 if (N->getNumValues() != 1)
3460 return false;
3461 if (!N->hasNUsesOfValue(1, 0))
3462 return false;
3463
3464 SDValue TCChain = Chain;
3465 SDNode *Copy = *N->user_begin();
3466 if (Copy->getOpcode() == ISD::CopyToReg) {
3467 // If the copy has a glue operand, we conservatively assume it isn't safe to
3468 // perform a tail call.
3469 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3470 return false;
3471 TCChain = Copy->getOperand(0);
3472 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3473 SDNode *VMov = Copy;
3474 // f64 returned in a pair of GPRs.
3476 for (SDNode *U : VMov->users()) {
3477 if (U->getOpcode() != ISD::CopyToReg)
3478 return false;
3479 Copies.insert(U);
3480 }
3481 if (Copies.size() > 2)
3482 return false;
3483
3484 for (SDNode *U : VMov->users()) {
3485 SDValue UseChain = U->getOperand(0);
3486 if (Copies.count(UseChain.getNode()))
3487 // Second CopyToReg
3488 Copy = U;
3489 else {
3490 // We are at the top of this chain.
3491 // If the copy has a glue operand, we conservatively assume it
3492 // isn't safe to perform a tail call.
3493 if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3494 return false;
3495 // First CopyToReg
3496 TCChain = UseChain;
3497 }
3498 }
3499 } else if (Copy->getOpcode() == ISD::BITCAST) {
3500 // f32 returned in a single GPR.
3501 if (!Copy->hasOneUse())
3502 return false;
3503 Copy = *Copy->user_begin();
3504 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3505 return false;
3506 // If the copy has a glue operand, we conservatively assume it isn't safe to
3507 // perform a tail call.
3508 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3509 return false;
3510 TCChain = Copy->getOperand(0);
3511 } else {
3512 return false;
3513 }
3514
3515 bool HasRet = false;
3516 for (const SDNode *U : Copy->users()) {
3517 if (U->getOpcode() != ARMISD::RET_GLUE &&
3518 U->getOpcode() != ARMISD::INTRET_GLUE)
3519 return false;
3520 HasRet = true;
3521 }
3522
3523 if (!HasRet)
3524 return false;
3525
3526 Chain = TCChain;
3527 return true;
3528}
3529
3530bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3531 if (!Subtarget->supportsTailCall())
3532 return false;
3533
3534 if (!CI->isTailCall())
3535 return false;
3536
3537 return true;
3538}
3539
3540// Trying to write a 64 bit value so need to split into two 32 bit values first,
3541// and pass the lower and high parts through.
3543 SDLoc DL(Op);
3544 SDValue WriteValue = Op->getOperand(2);
3545
3546 // This function is only supposed to be called for i64 type argument.
3547 assert(WriteValue.getValueType() == MVT::i64
3548 && "LowerWRITE_REGISTER called for non-i64 type argument.");
3549
3550 SDValue Lo, Hi;
3551 std::tie(Lo, Hi) = DAG.SplitScalar(WriteValue, DL, MVT::i32, MVT::i32);
3552 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3553 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3554}
3555
3556// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3557// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3558// one of the above mentioned nodes. It has to be wrapped because otherwise
3559// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3560// be used to form addressing mode. These wrapped nodes will be selected
3561// into MOVi.
3562SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3563 SelectionDAG &DAG) const {
3564 EVT PtrVT = Op.getValueType();
3565 // FIXME there is no actual debug info here
3566 SDLoc dl(Op);
3567 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3568 SDValue Res;
3569
3570 // When generating execute-only code Constant Pools must be promoted to the
3571 // global data section. It's a bit ugly that we can't share them across basic
3572 // blocks, but this way we guarantee that execute-only behaves correct with
3573 // position-independent addressing modes.
3574 if (Subtarget->genExecuteOnly()) {
3575 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3576 auto T = const_cast<Type*>(CP->getType());
3577 auto C = const_cast<Constant*>(CP->getConstVal());
3578 auto M = const_cast<Module*>(DAG.getMachineFunction().
3580 auto GV = new GlobalVariable(
3581 *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3584 Twine(AFI->createPICLabelUId())
3585 );
3586 SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3587 dl, PtrVT);
3588 return LowerGlobalAddress(GA, DAG);
3589 }
3590
3591 // The 16-bit ADR instruction can only encode offsets that are multiples of 4,
3592 // so we need to align to at least 4 bytes when we don't have 32-bit ADR.
3593 Align CPAlign = CP->getAlign();
3594 if (Subtarget->isThumb1Only())
3595 CPAlign = std::max(CPAlign, Align(4));
3596 if (CP->isMachineConstantPoolEntry())
3597 Res =
3598 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign);
3599 else
3600 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign);
3601 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3602}
3603
3605 // If we don't have a 32-bit pc-relative branch instruction then the jump
3606 // table consists of block addresses. Usually this is inline, but for
3607 // execute-only it must be placed out-of-line.
3608 if (Subtarget->genExecuteOnly() && !Subtarget->hasV8MBaselineOps())
3611}
3612
3613SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3614 SelectionDAG &DAG) const {
3617 unsigned ARMPCLabelIndex = 0;
3618 SDLoc DL(Op);
3619 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3620 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3621 SDValue CPAddr;
3622 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3623 if (!IsPositionIndependent) {
3624 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3625 } else {
3626 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3627 ARMPCLabelIndex = AFI->createPICLabelUId();
3629 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3630 ARMCP::CPBlockAddress, PCAdj);
3631 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3632 }
3633 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3634 SDValue Result = DAG.getLoad(
3635 PtrVT, DL, DAG.getEntryNode(), CPAddr,
3637 if (!IsPositionIndependent)
3638 return Result;
3639 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3640 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3641}
3642
3643/// Convert a TLS address reference into the correct sequence of loads
3644/// and calls to compute the variable's address for Darwin, and return an
3645/// SDValue containing the final node.
3646
3647/// Darwin only has one TLS scheme which must be capable of dealing with the
3648/// fully general situation, in the worst case. This means:
3649/// + "extern __thread" declaration.
3650/// + Defined in a possibly unknown dynamic library.
3651///
3652/// The general system is that each __thread variable has a [3 x i32] descriptor
3653/// which contains information used by the runtime to calculate the address. The
3654/// only part of this the compiler needs to know about is the first word, which
3655/// contains a function pointer that must be called with the address of the
3656/// entire descriptor in "r0".
3657///
3658/// Since this descriptor may be in a different unit, in general access must
3659/// proceed along the usual ARM rules. A common sequence to produce is:
3660///
3661/// movw rT1, :lower16:_var$non_lazy_ptr
3662/// movt rT1, :upper16:_var$non_lazy_ptr
3663/// ldr r0, [rT1]
3664/// ldr rT2, [r0]
3665/// blx rT2
3666/// [...address now in r0...]
3667SDValue
3668ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3669 SelectionDAG &DAG) const {
3670 assert(Subtarget->isTargetDarwin() &&
3671 "This function expects a Darwin target");
3672 SDLoc DL(Op);
3673
3674 // First step is to get the address of the actua global symbol. This is where
3675 // the TLS descriptor lives.
3676 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3677
3678 // The first entry in the descriptor is a function pointer that we must call
3679 // to obtain the address of the variable.
3680 SDValue Chain = DAG.getEntryNode();
3681 SDValue FuncTLVGet = DAG.getLoad(
3682 MVT::i32, DL, Chain, DescAddr,
3686 Chain = FuncTLVGet.getValue(1);
3687
3689 MachineFrameInfo &MFI = F.getFrameInfo();
3690 MFI.setAdjustsStack(true);
3691
3692 // TLS calls preserve all registers except those that absolutely must be
3693 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3694 // silly).
3695 auto TRI =
3697 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3699
3700 // Finally, we can make the call. This is just a degenerate version of a
3701 // normal AArch64 call node: r0 takes the address of the descriptor, and
3702 // returns the address of the variable in this thread.
3703 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3704 Chain =
3705 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3706 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3707 DAG.getRegisterMask(Mask), Chain.getValue(1));
3708 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3709}
3710
3711SDValue
3712ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3713 SelectionDAG &DAG) const {
3714 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3715
3716 SDValue Chain = DAG.getEntryNode();
3717 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3718 SDLoc DL(Op);
3719
3720 // Load the current TEB (thread environment block)
3721 SDValue Ops[] = {Chain,
3722 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3723 DAG.getTargetConstant(15, DL, MVT::i32),
3724 DAG.getTargetConstant(0, DL, MVT::i32),
3725 DAG.getTargetConstant(13, DL, MVT::i32),
3726 DAG.getTargetConstant(0, DL, MVT::i32),
3727 DAG.getTargetConstant(2, DL, MVT::i32)};
3728 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3729 DAG.getVTList(MVT::i32, MVT::Other), Ops);
3730
3731 SDValue TEB = CurrentTEB.getValue(0);
3732 Chain = CurrentTEB.getValue(1);
3733
3734 // Load the ThreadLocalStoragePointer from the TEB
3735 // A pointer to the TLS array is located at offset 0x2c from the TEB.
3736 SDValue TLSArray =
3737 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3738 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3739
3740 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3741 // offset into the TLSArray.
3742
3743 // Load the TLS index from the C runtime
3744 SDValue TLSIndex =
3745 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3746 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3747 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3748
3749 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3750 DAG.getConstant(2, DL, MVT::i32));
3751 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3752 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3754
3755 // Get the offset of the start of the .tls section (section base)
3756 const auto *GA = cast<GlobalAddressSDNode>(Op);
3757 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3758 SDValue Offset = DAG.getLoad(
3759 PtrVT, DL, Chain,
3760 DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3761 DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3763
3764 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3765}
3766
3767// Lower ISD::GlobalTLSAddress using the "general dynamic" model
3768SDValue
3769ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3770 SelectionDAG &DAG) const {
3771 SDLoc dl(GA);
3772 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3773 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3776 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3778 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3779 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3780 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3781 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3782 Argument = DAG.getLoad(
3783 PtrVT, dl, DAG.getEntryNode(), Argument,
3785 SDValue Chain = Argument.getValue(1);
3786
3787 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3788 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3789
3790 // call __tls_get_addr.
3792 ArgListEntry Entry;
3793 Entry.Node = Argument;
3794 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3795 Args.push_back(Entry);
3796
3797 // FIXME: is there useful debug info available here?
3799 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3801 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3802
3803 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3804 return CallResult.first;
3805}
3806
3807// Lower ISD::GlobalTLSAddress using the "initial exec" or
3808// "local exec" model.
3809SDValue
3810ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3811 SelectionDAG &DAG,
3812 TLSModel::Model model) const {
3813 const GlobalValue *GV = GA->getGlobal();
3814 SDLoc dl(GA);
3816 SDValue Chain = DAG.getEntryNode();
3817 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3818 // Get the Thread Pointer
3820
3821 if (model == TLSModel::InitialExec) {
3824 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3825 // Initial exec model.
3826 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3828 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3830 true);
3831 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3832 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3833 Offset = DAG.getLoad(
3834 PtrVT, dl, Chain, Offset,
3836 Chain = Offset.getValue(1);
3837
3838 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3839 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3840
3841 Offset = DAG.getLoad(
3842 PtrVT, dl, Chain, Offset,
3844 } else {
3845 // local exec model
3846 assert(model == TLSModel::LocalExec);
3849 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3850 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3851 Offset = DAG.getLoad(
3852 PtrVT, dl, Chain, Offset,
3854 }
3855
3856 // The address of the thread local variable is the add of the thread
3857 // pointer with the offset of the variable.
3858 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3859}
3860
3861SDValue
3862ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3863 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3864 if (DAG.getTarget().useEmulatedTLS())
3865 return LowerToTLSEmulatedModel(GA, DAG);
3866
3867 if (Subtarget->isTargetDarwin())
3868 return LowerGlobalTLSAddressDarwin(Op, DAG);
3869
3870 if (Subtarget->isTargetWindows())
3871 return LowerGlobalTLSAddressWindows(Op, DAG);
3872
3873 // TODO: implement the "local dynamic" model
3874 assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3876
3877 switch (model) {
3880 return LowerToTLSGeneralDynamicModel(GA, DAG);
3883 return LowerToTLSExecModels(GA, DAG, model);
3884 }
3885 llvm_unreachable("bogus TLS model");
3886}
3887
3888/// Return true if all users of V are within function F, looking through
3889/// ConstantExprs.
3890static bool allUsersAreInFunction(const Value *V, const Function *F) {
3891 SmallVector<const User*,4> Worklist(V->users());
3892 while (!Worklist.empty()) {
3893 auto *U = Worklist.pop_back_val();
3894 if (isa<ConstantExpr>(U)) {
3895 append_range(Worklist, U->users());
3896 continue;
3897 }
3898
3899 auto *I = dyn_cast<Instruction>(U);
3900 if (!I || I->getParent()->getParent() != F)
3901 return false;
3902 }
3903 return true;
3904}
3905
3907 const GlobalValue *GV, SelectionDAG &DAG,
3908 EVT PtrVT, const SDLoc &dl) {
3909 // If we're creating a pool entry for a constant global with unnamed address,
3910 // and the global is small enough, we can emit it inline into the constant pool
3911 // to save ourselves an indirection.
3912 //
3913 // This is a win if the constant is only used in one function (so it doesn't
3914 // need to be duplicated) or duplicating the constant wouldn't increase code
3915 // size (implying the constant is no larger than 4 bytes).
3916 const Function &F = DAG.getMachineFunction().getFunction();
3917
3918 // We rely on this decision to inline being idemopotent and unrelated to the
3919 // use-site. We know that if we inline a variable at one use site, we'll
3920 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3921 // doesn't know about this optimization, so bail out if it's enabled else
3922 // we could decide to inline here (and thus never emit the GV) but require
3923 // the GV from fast-isel generated code.
3926 return SDValue();
3927
3928 auto *GVar = dyn_cast<GlobalVariable>(GV);
3929 if (!GVar || !GVar->hasInitializer() ||
3930 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3931 !GVar->hasLocalLinkage())
3932 return SDValue();
3933
3934 // If we inline a value that contains relocations, we move the relocations
3935 // from .data to .text. This is not allowed in position-independent code.
3936 auto *Init = GVar->getInitializer();
3937 if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3938 Init->needsDynamicRelocation())
3939 return SDValue();
3940
3941 // The constant islands pass can only really deal with alignment requests
3942 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3943 // any type wanting greater alignment requirements than 4 bytes. We also
3944 // can only promote constants that are multiples of 4 bytes in size or
3945 // are paddable to a multiple of 4. Currently we only try and pad constants
3946 // that are strings for simplicity.
3947 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3948 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3949 Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
3950 unsigned RequiredPadding = 4 - (Size % 4);
3951 bool PaddingPossible =
3952 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3953 if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
3954 Size == 0)
3955 return SDValue();
3956
3957 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3960
3961 // We can't bloat the constant pool too much, else the ConstantIslands pass
3962 // may fail to converge. If we haven't promoted this global yet (it may have
3963 // multiple uses), and promoting it would increase the constant pool size (Sz
3964 // > 4), ensure we have space to do so up to MaxTotal.
3965 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3966 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3968 return SDValue();
3969
3970 // This is only valid if all users are in a single function; we can't clone
3971 // the constant in general. The LLVM IR unnamed_addr allows merging
3972 // constants, but not cloning them.
3973 //
3974 // We could potentially allow cloning if we could prove all uses of the
3975 // constant in the current function don't care about the address, like
3976 // printf format strings. But that isn't implemented for now.
3977 if (!allUsersAreInFunction(GVar, &F))
3978 return SDValue();
3979
3980 // We're going to inline this global. Pad it out if needed.
3981 if (RequiredPadding != 4) {
3982 StringRef S = CDAInit->getAsString();
3983
3985 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3986 while (RequiredPadding--)
3987 V.push_back(0);
3989 }
3990
3991 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3992 SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4));
3993 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3996 PaddedSize - 4);
3997 }
3998 ++NumConstpoolPromoted;
3999 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4000}
4001
4003 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
4004 if (!(GV = GA->getAliaseeObject()))
4005 return false;
4006 if (const auto *V = dyn_cast<GlobalVariable>(GV))
4007 return V->isConstant();
4008 return isa<Function>(GV);
4009}
4010
4011SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
4012 SelectionDAG &DAG) const {
4013 switch (Subtarget->getTargetTriple().getObjectFormat()) {
4014 default: llvm_unreachable("unknown object format");
4015 case Triple::COFF:
4016 return LowerGlobalAddressWindows(Op, DAG);
4017 case Triple::ELF:
4018 return LowerGlobalAddressELF(Op, DAG);
4019 case Triple::MachO:
4020 return LowerGlobalAddressDarwin(Op, DAG);
4021 }
4022}
4023
4024SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
4025 SelectionDAG &DAG) const {
4026 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4027 SDLoc dl(Op);
4028 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4029 bool IsRO = isReadOnly(GV);
4030
4031 // promoteToConstantPool only if not generating XO text section
4032 if (GV->isDSOLocal() && !Subtarget->genExecuteOnly())
4033 if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
4034 return V;
4035
4036 if (isPositionIndependent()) {
4038 GV, dl, PtrVT, 0, GV->isDSOLocal() ? 0 : ARMII::MO_GOT);
4039 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
4040 if (!GV->isDSOLocal())
4041 Result =
4042 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
4044 return Result;
4045 } else if (Subtarget->isROPI() && IsRO) {
4046 // PC-relative.
4047 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
4048 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
4049 return Result;
4050 } else if (Subtarget->isRWPI() && !IsRO) {
4051 // SB-relative.
4052 SDValue RelAddr;
4053 if (Subtarget->useMovt()) {
4054 ++NumMovwMovt;
4055 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
4056 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
4057 } else { // use literal pool for address constant
4060 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
4061 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4062 RelAddr = DAG.getLoad(
4063 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4065 }
4066 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
4067 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
4068 return Result;
4069 }
4070
4071 // If we have T2 ops, we can materialize the address directly via movt/movw
4072 // pair. This is always cheaper. If need to generate Execute Only code, and we
4073 // only have Thumb1 available, we can't use a constant pool and are forced to
4074 // use immediate relocations.
4075 if (Subtarget->useMovt() || Subtarget->genExecuteOnly()) {
4076 if (Subtarget->useMovt())
4077 ++NumMovwMovt;
4078 // FIXME: Once remat is capable of dealing with instructions with register
4079 // operands, expand this into two nodes.
4080 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
4081 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
4082 } else {
4083 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4));
4084 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4085 return DAG.getLoad(
4086 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4088 }
4089}
4090
4091SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
4092 SelectionDAG &DAG) const {
4093 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
4094 "ROPI/RWPI not currently supported for Darwin");
4095 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4096 SDLoc dl(Op);
4097 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4098
4099 if (Subtarget->useMovt())
4100 ++NumMovwMovt;
4101
4102 // FIXME: Once remat is capable of dealing with instructions with register
4103 // operands, expand this into multiple nodes
4104 unsigned Wrapper =
4106
4107 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
4108 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
4109
4110 if (Subtarget->isGVIndirectSymbol(GV))
4111 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
4113 return Result;
4114}
4115
4116SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
4117 SelectionDAG &DAG) const {
4118 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
4119 assert(Subtarget->useMovt() &&
4120 "Windows on ARM expects to use movw/movt");
4121 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
4122 "ROPI/RWPI not currently supported for Windows");
4123
4125 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4126 ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
4127 if (GV->hasDLLImportStorageClass())
4128 TargetFlags = ARMII::MO_DLLIMPORT;
4129 else if (!TM.shouldAssumeDSOLocal(GV))
4130 TargetFlags = ARMII::MO_COFFSTUB;
4131 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4133 SDLoc DL(Op);
4134
4135 ++NumMovwMovt;
4136
4137 // FIXME: Once remat is capable of dealing with instructions with register
4138 // operands, expand this into two nodes.
4139 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
4140 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
4141 TargetFlags));
4142 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
4143 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4145 return Result;
4146}
4147
4148SDValue
4149ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
4150 SDLoc dl(Op);
4151 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
4152 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
4153 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
4154 Op.getOperand(1), Val);
4155}
4156
4157SDValue
4158ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
4159 SDLoc dl(Op);
4160 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
4161 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
4162}
4163
4164SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
4165 SelectionDAG &DAG) const {
4166 SDLoc dl(Op);
4167 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
4168 Op.getOperand(0));
4169}
4170
4171SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
4172 SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
4173 unsigned IntNo =
4174 Op.getConstantOperandVal(Op.getOperand(0).getValueType() == MVT::Other);
4175 switch (IntNo) {
4176 default:
4177 return SDValue(); // Don't custom lower most intrinsics.
4178 case Intrinsic::arm_gnu_eabi_mcount: {
4180 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4181 SDLoc dl(Op);
4182 SDValue Chain = Op.getOperand(0);
4183 // call "\01__gnu_mcount_nc"
4184 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
4185 const uint32_t *Mask =
4187 assert(Mask && "Missing call preserved mask for calling convention");
4188 // Mark LR an implicit live-in.
4189 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4190 SDValue ReturnAddress =
4191 DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
4192 constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
4193 SDValue Callee =
4194 DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
4196 if (Subtarget->isThumb())
4197 return SDValue(
4198 DAG.getMachineNode(
4199 ARM::tBL_PUSHLR, dl, ResultTys,
4200 {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
4201 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
4202 0);
4203 return SDValue(
4204 DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
4205 {ReturnAddress, Callee, RegisterMask, Chain}),
4206 0);
4207 }
4208 }
4209}
4210
4211SDValue
4212ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
4213 const ARMSubtarget *Subtarget) const {
4214 unsigned IntNo = Op.getConstantOperandVal(0);
4215 SDLoc dl(Op);
4216 switch (IntNo) {
4217 default: return SDValue(); // Don't custom lower most intrinsics.
4218 case Intrinsic::thread_pointer: {
4219 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4220 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
4221 }
4222 case Intrinsic::arm_cls: {
4223 const SDValue &Operand = Op.getOperand(1);
4224 const EVT VTy = Op.getValueType();
4225 SDValue SRA =
4226 DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
4227 SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
4228 SDValue SHL =
4229 DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
4230 SDValue OR =
4231 DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
4232 SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
4233 return Result;
4234 }
4235 case Intrinsic::arm_cls64: {
4236 // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
4237 // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
4238 const SDValue &Operand = Op.getOperand(1);
4239 const EVT VTy = Op.getValueType();
4240 SDValue Lo, Hi;
4241 std::tie(Lo, Hi) = DAG.SplitScalar(Operand, dl, VTy, VTy);
4242 SDValue Constant0 = DAG.getConstant(0, dl, VTy);
4243 SDValue Constant1 = DAG.getConstant(1, dl, VTy);
4244 SDValue Constant31 = DAG.getConstant(31, dl, VTy);
4245 SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
4246 SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
4247 SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
4248 SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
4249 SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
4250 SDValue CheckLo =
4251 DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
4252 SDValue HiIsZero =
4253 DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
4254 SDValue AdjustedLo =
4255 DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
4256 SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
4257 SDValue Result =
4258 DAG.getSelect(dl, VTy, CheckLo,
4259 DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
4260 return Result;
4261 }
4262 case Intrinsic::eh_sjlj_lsda: {
4265 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
4266 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4267 SDValue CPAddr;
4268 bool IsPositionIndependent = isPositionIndependent();
4269 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
4271 ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
4272 ARMCP::CPLSDA, PCAdj);
4273 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
4274 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4275 SDValue Result = DAG.getLoad(
4276 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4278
4279 if (IsPositionIndependent) {
4280 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
4281 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
4282 }
4283 return Result;
4284 }
4285 case Intrinsic::arm_neon_vabs:
4286 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
4287 Op.getOperand(1));
4288 case Intrinsic::arm_neon_vabds:
4289 if (Op.getValueType().isInteger())
4290 return DAG.getNode(ISD::ABDS, SDLoc(Op), Op.getValueType(),
4291 Op.getOperand(1), Op.getOperand(2));
4292 return SDValue();
4293 case Intrinsic::arm_neon_vabdu:
4294 return DAG.getNode(ISD::ABDU, SDLoc(Op), Op.getValueType(),
4295 Op.getOperand(1), Op.getOperand(2));
4296 case Intrinsic::arm_neon_vmulls:
4297 case Intrinsic::arm_neon_vmullu: {
4298 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
4300 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4301 Op.getOperand(1), Op.getOperand(2));
4302 }
4303 case Intrinsic::arm_neon_vminnm:
4304 case Intrinsic::arm_neon_vmaxnm: {
4305 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
4307 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4308 Op.getOperand(1), Op.getOperand(2));
4309 }
4310 case Intrinsic::arm_neon_vminu:
4311 case Intrinsic::arm_neon_vmaxu: {
4312 if (Op.getValueType().isFloatingPoint())
4313 return SDValue();
4314 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
4315 ? ISD::UMIN : ISD::UMAX;
4316 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4317 Op.getOperand(1), Op.getOperand(2));
4318 }
4319 case Intrinsic::arm_neon_vmins:
4320 case Intrinsic::arm_neon_vmaxs: {
4321 // v{min,max}s is overloaded between signed integers and floats.
4322 if (!Op.getValueType().isFloatingPoint()) {
4323 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4324 ? ISD::SMIN : ISD::SMAX;
4325 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4326 Op.getOperand(1), Op.getOperand(2));
4327 }
4328 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4330 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4331 Op.getOperand(1), Op.getOperand(2));
4332 }
4333 case Intrinsic::arm_neon_vtbl1:
4334 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
4335 Op.getOperand(1), Op.getOperand(2));
4336 case Intrinsic::arm_neon_vtbl2:
4337 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
4338 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4339 case Intrinsic::arm_mve_pred_i2v:
4340 case Intrinsic::arm_mve_pred_v2i:
4341 return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),
4342 Op.getOperand(1));
4343 case Intrinsic::arm_mve_vreinterpretq:
4344 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),
4345 Op.getOperand(1));
4346 case Intrinsic::arm_mve_lsll:
4347 return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
4348 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4349 case Intrinsic::arm_mve_asrl:
4350 return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
4351 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4352 }
4353}
4354
4356 const ARMSubtarget *Subtarget) {
4357 SDLoc dl(Op);
4358 auto SSID = static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4359 if (SSID == SyncScope::SingleThread)
4360 return Op;
4361
4362 if (!Subtarget->hasDataBarrier()) {
4363 // Some ARMv6 cpus can support data barriers with an mcr instruction.
4364 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
4365 // here.
4366 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
4367 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
4368 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
4369 DAG.getConstant(0, dl, MVT::i32));
4370 }
4371
4372 AtomicOrdering Ord =
4373 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4375 if (Subtarget->isMClass()) {
4376 // Only a full system barrier exists in the M-class architectures.
4378 } else if (Subtarget->preferISHSTBarriers() &&
4379 Ord == AtomicOrdering::Release) {
4380 // Swift happens to implement ISHST barriers in a way that's compatible with
4381 // Release semantics but weaker than ISH so we'd be fools not to use
4382 // it. Beware: other processors probably don't!
4384 }
4385
4386 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
4387 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
4388 DAG.getConstant(Domain, dl, MVT::i32));
4389}
4390
4392 const ARMSubtarget *Subtarget) {
4393 // ARM pre v5TE and Thumb1 does not have preload instructions.
4394 if (!(Subtarget->isThumb2() ||
4395 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
4396 // Just preserve the chain.
4397 return Op.getOperand(0);
4398
4399 SDLoc dl(Op);
4400 unsigned isRead = ~Op.getConstantOperandVal(2) & 1;
4401 if (!isRead &&
4402 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4403 // ARMv7 with MP extension has PLDW.
4404 return Op.getOperand(0);
4405
4406 unsigned isData = Op.getConstantOperandVal(4);
4407 if (Subtarget->isThumb()) {
4408 // Invert the bits.
4409 isRead = ~isRead & 1;
4410 isData = ~isData & 1;
4411 }
4412
4413 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
4414 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
4415 DAG.getConstant(isData, dl, MVT::i32));
4416}
4417
4420 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
4421
4422 // vastart just stores the address of the VarArgsFrameIndex slot into the
4423 // memory location argument.
4424 SDLoc dl(Op);
4426 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4427 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4428 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4429 MachinePointerInfo(SV));
4430}
4431
4432SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
4433 CCValAssign &NextVA,
4434 SDValue &Root,
4435 SelectionDAG &DAG,
4436 const SDLoc &dl) const {
4439
4440 const TargetRegisterClass *RC;
4441 if (AFI->isThumb1OnlyFunction())
4442 RC = &ARM::tGPRRegClass;
4443 else
4444 RC = &ARM::GPRRegClass;
4445
4446 // Transform the arguments stored in physical registers into virtual ones.
4447 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4448 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4449
4450 SDValue ArgValue2;
4451 if (NextVA.isMemLoc()) {
4452 MachineFrameInfo &MFI = MF.getFrameInfo();
4453 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
4454
4455 // Create load node to retrieve arguments from the stack.
4456 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4457 ArgValue2 = DAG.getLoad(
4458 MVT::i32, dl, Root, FIN,
4460 } else {
4461 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
4462 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4463 }
4464 if (!Subtarget->isLittle())
4465 std::swap (ArgValue, ArgValue2);
4466 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
4467}
4468
4469// The remaining GPRs hold either the beginning of variable-argument
4470// data, or the beginning of an aggregate passed by value (usually
4471// byval). Either way, we allocate stack slots adjacent to the data
4472// provided by our caller, and store the unallocated registers there.
4473// If this is a variadic function, the va_list pointer will begin with
4474// these values; otherwise, this reassembles a (byval) structure that
4475// was split between registers and memory.
4476// Return: The frame index registers were stored into.
4477int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
4478 const SDLoc &dl, SDValue &Chain,
4479 const Value *OrigArg,
4480 unsigned InRegsParamRecordIdx,
4481 int ArgOffset, unsigned ArgSize) const {
4482 // Currently, two use-cases possible:
4483 // Case #1. Non-var-args function, and we meet first byval parameter.
4484 // Setup first unallocated register as first byval register;
4485 // eat all remained registers
4486 // (these two actions are performed by HandleByVal method).
4487 // Then, here, we initialize stack frame with
4488 // "store-reg" instructions.
4489 // Case #2. Var-args function, that doesn't contain byval parameters.
4490 // The same: eat all remained unallocated registers,
4491 // initialize stack frame.
4492
4494 MachineFrameInfo &MFI = MF.getFrameInfo();
4496 unsigned RBegin, REnd;
4497 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
4498 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
4499 } else {
4500 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4501 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
4502 REnd = ARM::R4;
4503 }
4504
4505 if (REnd != RBegin)
4506 ArgOffset = -4 * (ARM::R4 - RBegin);
4507
4508 auto PtrVT = getPointerTy(DAG.getDataLayout());
4509 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
4510 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
4511
4513 const TargetRegisterClass *RC =
4514 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
4515
4516 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
4517 Register VReg = MF.addLiveIn(Reg, RC);
4518 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4519 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4520 MachinePointerInfo(OrigArg, 4 * i));
4521 MemOps.push_back(Store);
4522 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
4523 }
4524
4525 if (!MemOps.empty())
4526 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4527 return FrameIndex;
4528}
4529
4530// Setup stack frame, the va_list pointer will start from.
4531void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
4532 const SDLoc &dl, SDValue &Chain,
4533 unsigned ArgOffset,
4534 unsigned TotalArgRegsSaveSize,
4535 bool ForceMutable) const {
4538
4539 // Try to store any remaining integer argument regs
4540 // to their spots on the stack so that they may be loaded by dereferencing
4541 // the result of va_next.
4542 // If there is no regs to be stored, just point address after last
4543 // argument passed via stack.
4544 int FrameIndex = StoreByValRegs(
4545 CCInfo, DAG, dl, Chain, nullptr, CCInfo.getInRegsParamsCount(),
4546 CCInfo.getStackSize(), std::max(4U, TotalArgRegsSaveSize));
4547 AFI->setVarArgsFrameIndex(FrameIndex);
4548}
4549
4550bool ARMTargetLowering::splitValueIntoRegisterParts(
4551 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
4552 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
4553 EVT ValueVT = Val.getValueType();
4554 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4555 unsigned ValueBits = ValueVT.getSizeInBits();
4556 unsigned PartBits = PartVT.getSizeInBits();
4557 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
4558 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
4559 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
4560 Parts[0] = Val;
4561 return true;
4562 }
4563 return false;
4564}
4565
4566SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
4567 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
4568 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
4569 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4570 unsigned ValueBits = ValueVT.getSizeInBits();
4571 unsigned PartBits = PartVT.getSizeInBits();
4572 SDValue Val = Parts[0];
4573
4574 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
4575 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
4576 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
4577 return Val;
4578 }
4579 return SDValue();
4580}
4581
4582SDValue ARMTargetLowering::LowerFormalArguments(
4583 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4584 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4585 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4587 MachineFrameInfo &MFI = MF.getFrameInfo();
4588
4590
4591 // Assign locations to all of the incoming arguments.
4593 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4594 *DAG.getContext());
4595 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
4596
4598 unsigned CurArgIdx = 0;
4599
4600 // Initially ArgRegsSaveSize is zero.
4601 // Then we increase this value each time we meet byval parameter.
4602 // We also increase this value in case of varargs function.
4603 AFI->setArgRegsSaveSize(0);
4604
4605 // Calculate the amount of stack space that we need to allocate to store
4606 // byval and variadic arguments that are passed in registers.
4607 // We need to know this before we allocate the first byval or variadic
4608 // argument, as they will be allocated a stack slot below the CFA (Canonical
4609 // Frame Address, the stack pointer at entry to the function).
4610 unsigned ArgRegBegin = ARM::R4;
4611 for (const CCValAssign &VA : ArgLocs) {
4612 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
4613 break;
4614
4615 unsigned Index = VA.getValNo();
4616 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
4617 if (!Flags.isByVal())
4618 continue;
4619
4620 assert(VA.isMemLoc() && "unexpected byval pointer in reg");
4621 unsigned RBegin, REnd;
4622 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
4623 ArgRegBegin = std::min(ArgRegBegin, RBegin);
4624
4625 CCInfo.nextInRegsParam();
4626 }
4627 CCInfo.rewindByValRegsInfo();
4628
4629 int lastInsIndex = -1;
4630 if (isVarArg && MFI.hasVAStart()) {
4631 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4632 if (RegIdx != std::size(GPRArgRegs))
4633 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
4634 }
4635
4636 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4637 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
4638 auto PtrVT = getPointerTy(DAG.getDataLayout());
4639
4640 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4641 CCValAssign &VA = ArgLocs[i];
4642 if (Ins[VA.getValNo()].isOrigArg()) {
4643 std::advance(CurOrigArg,
4644 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
4645 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
4646 }
4647 // Arguments stored in registers.
4648 if (VA.isRegLoc()) {
4649 EVT RegVT = VA.getLocVT();
4650 SDValue ArgValue;
4651
4652 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
4653 // f64 and vector types are split up into multiple registers or
4654 // combinations of registers and stack slots.
4655 SDValue ArgValue1 =
4656 GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4657 VA = ArgLocs[++i]; // skip ahead to next loc
4658 SDValue ArgValue2;
4659 if (VA.isMemLoc()) {
4660 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
4661 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4662 ArgValue2 = DAG.getLoad(
4663 MVT::f64, dl, Chain, FIN,
4665 } else {
4666 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4667 }
4668 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
4669 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4670 ArgValue1, DAG.getIntPtrConstant(0, dl));
4671 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4672 ArgValue2, DAG.getIntPtrConstant(1, dl));
4673 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
4674 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4675 } else {
4676 const TargetRegisterClass *RC;
4677
4678 if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4679 RC = &ARM::HPRRegClass;
4680 else if (RegVT == MVT::f32)
4681 RC = &ARM::SPRRegClass;
4682 else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||
4683 RegVT == MVT::v4bf16)
4684 RC = &ARM::DPRRegClass;
4685 else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||
4686 RegVT == MVT::v8bf16)
4687 RC = &ARM::QPRRegClass;
4688 else if (RegVT == MVT::i32)
4689 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4690 : &ARM::GPRRegClass;
4691 else
4692 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
4693
4694 // Transform the arguments in physical registers into virtual ones.
4695 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4696 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4697
4698 // If this value is passed in r0 and has the returned attribute (e.g.
4699 // C++ 'structors), record this fact for later use.
4700 if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
4701 AFI->setPreservesR0();
4702 }
4703 }
4704
4705 // If this is an 8 or 16-bit value, it is really passed promoted
4706 // to 32 bits. Insert an assert[sz]ext to capture this, then
4707 // truncate to the right size.
4708 switch (VA.getLocInfo()) {
4709 default: llvm_unreachable("Unknown loc info!");
4710 case CCValAssign::Full: break;
4711 case CCValAssign::BCvt:
4712 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
4713 break;
4714 }
4715
4716 // f16 arguments have their size extended to 4 bytes and passed as if they
4717 // had been copied to the LSBs of a 32-bit register.
4718 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
4719 if (VA.needsCustom() &&
4720 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
4721 ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue);
4722
4723 // On CMSE Entry Functions, formal integer arguments whose bitwidth is
4724 // less than 32 bits must be sign- or zero-extended in the callee for
4725 // security reasons. Although the ABI mandates an extension done by the
4726 // caller, the latter cannot be trusted to follow the rules of the ABI.
4727 const ISD::InputArg &Arg = Ins[VA.getValNo()];
4728 if (AFI->isCmseNSEntryFunction() && Arg.ArgVT.isScalarInteger() &&
4729 RegVT.isScalarInteger() && Arg.ArgVT.bitsLT(MVT::i32))
4730 ArgValue = handleCMSEValue(ArgValue, Arg, DAG, dl);
4731
4732 InVals.push_back(ArgValue);
4733 } else { // VA.isRegLoc()
4734 // Only arguments passed on the stack should make it here.
4735 assert(VA.isMemLoc());
4736 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
4737
4738 int index = VA.getValNo();
4739
4740 // Some Ins[] entries become multiple ArgLoc[] entries.
4741 // Process them only once.
4742 if (index != lastInsIndex)
4743 {
4744 ISD::ArgFlagsTy Flags = Ins[index].Flags;
4745 // FIXME: For now, all byval parameter objects are marked mutable.
4746 // This can be changed with more analysis.
4747 // In case of tail call optimization mark all arguments mutable.
4748 // Since they could be overwritten by lowering of arguments in case of
4749 // a tail call.
4750 if (Flags.isByVal()) {
4751 assert(Ins[index].isOrigArg() &&
4752 "Byval arguments cannot be implicit");
4753 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
4754
4755 int FrameIndex = StoreByValRegs(
4756 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4757 VA.getLocMemOffset(), Flags.getByValSize());
4758 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
4759 CCInfo.nextInRegsParam();
4760 } else {
4761 unsigned FIOffset = VA.getLocMemOffset();
4762 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
4763 FIOffset, true);
4764
4765 // Create load nodes to retrieve arguments from the stack.
4766 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4767 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
4769 DAG.getMachineFunction(), FI)));
4770 }
4771 lastInsIndex = index;
4772 }
4773 }
4774 }
4775
4776 // varargs
4777 if (isVarArg && MFI.hasVAStart()) {
4778 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getStackSize(),
4779 TotalArgRegsSaveSize);
4780 if (AFI->isCmseNSEntryFunction()) {
4783 "secure entry function must not be variadic", dl.getDebugLoc());
4784 DAG.getContext()->diagnose(Diag);
4785 }
4786 }
4787
4788 unsigned StackArgSize = CCInfo.getStackSize();
4789 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
4790 if (canGuaranteeTCO(CallConv, TailCallOpt)) {
4791 // The only way to guarantee a tail call is if the callee restores its
4792 // argument area, but it must also keep the stack aligned when doing so.
4793 MaybeAlign StackAlign = DAG.getDataLayout().getStackAlignment();
4794 assert(StackAlign && "data layout string is missing stack alignment");
4795 StackArgSize = alignTo(StackArgSize, *StackAlign);
4796
4797 AFI->setArgumentStackToRestore(StackArgSize);
4798 }
4799 AFI->setArgumentStackSize(StackArgSize);
4800
4801 if (CCInfo.getStackSize() > 0 && AFI->isCmseNSEntryFunction()) {
4804 "secure entry function requires arguments on stack", dl.getDebugLoc());
4805 DAG.getContext()->diagnose(Diag);
4806 }
4807
4808 return Chain;
4809}
4810
4811/// isFloatingPointZero - Return true if this is +0.0.
4813 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
4814 return CFP->getValueAPF().isPosZero();
4815 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
4816 // Maybe this has already been legalized into the constant pool?
4817 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
4818 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
4819 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
4820 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
4821 return CFP->getValueAPF().isPosZero();
4822 }
4823 } else if (Op->getOpcode() == ISD::BITCAST &&
4824 Op->getValueType(0) == MVT::f64) {
4825 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
4826 // created by LowerConstantFP().
4827 SDValue BitcastOp = Op->getOperand(0);
4828 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
4829 isNullConstant(BitcastOp->getOperand(0)))
4830 return true;
4831 }
4832 return false;
4833}
4834
4835/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
4836/// the given operands.
4837SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4838 SDValue &ARMcc, SelectionDAG &DAG,
4839 const SDLoc &dl) const {
4840 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
4841 unsigned C = RHSC->getZExtValue();
4842 if (!isLegalICmpImmediate((int32_t)C)) {
4843 // Constant does not fit, try adjusting it by one.
4844 switch (CC) {
4845 default: break;
4846 case ISD::SETLT:
4847 case ISD::SETGE:
4848 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4850 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4851 }
4852 break;
4853 case ISD::SETULT:
4854 case ISD::SETUGE:
4855 if (C != 0 && isLegalICmpImmediate(C-1)) {
4857 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4858 }
4859 break;
4860 case ISD::SETLE:
4861 case ISD::SETGT:
4862 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
4864 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4865 }
4866 break;
4867 case ISD::SETULE:
4868 case ISD::SETUGT:
4869 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
4871 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4872 }
4873 break;
4874 }
4875 }
4876 } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
4878 // In ARM and Thumb-2, the compare instructions can shift their second
4879 // operand.
4881 std::swap(LHS, RHS);
4882 }
4883
4884 // Thumb1 has very limited immediate modes, so turning an "and" into a
4885 // shift can save multiple instructions.
4886 //
4887 // If we have (x & C1), and C1 is an appropriate mask, we can transform it
4888 // into "((x << n) >> n)". But that isn't necessarily profitable on its
4889 // own. If it's the operand to an unsigned comparison with an immediate,
4890 // we can eliminate one of the shifts: we transform
4891 // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)".
4892 //
4893 // We avoid transforming cases which aren't profitable due to encoding
4894 // details:
4895 //
4896 // 1. C2 fits into the immediate field of a cmp, and the transformed version
4897 // would not; in that case, we're essentially trading one immediate load for
4898 // another.
4899 // 2. C1 is 255 or 65535, so we can use uxtb or uxth.
4900 // 3. C2 is zero; we have other code for this special case.
4901 //
4902 // FIXME: Figure out profitability for Thumb2; we usually can't save an
4903 // instruction, since the AND is always one instruction anyway, but we could
4904 // use narrow instructions in some cases.
4905 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
4906 LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4907 LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&
4908 !isSignedIntSetCC(CC)) {
4909 unsigned Mask = LHS.getConstantOperandVal(1);
4910 auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
4911 uint64_t RHSV = RHSC->getZExtValue();
4912 if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
4913 unsigned ShiftBits = llvm::countl_zero(Mask);
4914 if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4915 SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);
4916 LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);
4917 RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32);
4918 }
4919 }
4920 }
4921
4922 // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a
4923 // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same
4924 // way a cmp would.
4925 // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and
4926 // some tweaks to the heuristics for the previous and->shift transform.
4927 // FIXME: Optimize cases where the LHS isn't a shift.
4928 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
4929 isa<ConstantSDNode>(RHS) && RHS->getAsZExtVal() == 0x80000000U &&
4930 CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4931 LHS.getConstantOperandVal(1) < 31) {
4932 unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1;
4933 SDValue Shift =
4934 DAG.getNode(ARMISD::LSLS, dl, DAG.getVTList(MVT::i32, FlagsVT),
4935 LHS.getOperand(0), DAG.getConstant(ShiftAmt, dl, MVT::i32));
4936 ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
4937 return Shift.getValue(1);
4938 }
4939
4941
4942 // If the RHS is a constant zero then the V (overflow) flag will never be
4943 // set. This can allow us to simplify GE to PL or LT to MI, which can be
4944 // simpler for other passes (like the peephole optimiser) to deal with.
4945 if (isNullConstant(RHS)) {
4946 switch (CondCode) {
4947 default: break;
4948 case ARMCC::GE:
4950 break;
4951 case ARMCC::LT:
4953 break;
4954 }
4955 }
4956
4957 ARMISD::NodeType CompareType;
4958 switch (CondCode) {
4959 default:
4960 CompareType = ARMISD::CMP;
4961 break;
4962 case ARMCC::EQ:
4963 case ARMCC::NE:
4964 // Uses only Z Flag
4965 CompareType = ARMISD::CMPZ;
4966 break;
4967 }
4968 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4969 return DAG.getNode(CompareType, dl, FlagsVT, LHS, RHS);
4970}
4971
4972/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
4973SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
4974 SelectionDAG &DAG, const SDLoc &dl,
4975 bool Signaling) const {
4976 assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
4977 SDValue Flags;
4978 if (!isFloatingPointZero(RHS))
4979 Flags = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP, dl, FlagsVT,
4980 LHS, RHS);
4981 else
4982 Flags = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, dl,
4983 FlagsVT, LHS);
4984 return DAG.getNode(ARMISD::FMSTAT, dl, FlagsVT, Flags);
4985}
4986
4987// This function returns three things: the arithmetic computation itself
4988// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
4989// comparison and the condition code define the case in which the arithmetic
4990// computation *does not* overflow.
4991std::pair<SDValue, SDValue>
4992ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
4993 SDValue &ARMcc) const {
4994 assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
4995
4996 SDValue Value, OverflowCmp;
4997 SDValue LHS = Op.getOperand(0);
4998 SDValue RHS = Op.getOperand(1);
4999 SDLoc dl(Op);
5000
5001 // FIXME: We are currently always generating CMPs because we don't support
5002 // generating CMN through the backend. This is not as good as the natural
5003 // CMP case because it causes a register dependency and cannot be folded
5004 // later.
5005
5006 switch (Op.getOpcode()) {
5007 default:
5008 llvm_unreachable("Unknown overflow instruction!");
5009 case ISD::SADDO:
5010 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
5011 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
5012 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
5013 break;
5014 case ISD::UADDO:
5015 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
5016 // We use ADDC here to correspond to its use in LowerUnsignedALUO.
5017 // We do not use it in the USUBO case as Value may not be used.
5018 Value = DAG.getNode(ARMISD::ADDC, dl,
5019 DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
5020 .getValue(0);
5021 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
5022 break;
5023 case ISD::SSUBO:
5024 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
5025 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
5026 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
5027 break;
5028 case ISD::USUBO:
5029 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
5030 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
5031 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
5032 break;
5033 case ISD::UMULO:
5034 // We generate a UMUL_LOHI and then check if the high word is 0.
5035 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
5036 Value = DAG.getNode(ISD::UMUL_LOHI, dl,
5037 DAG.getVTList(Op.getValueType(), Op.getValueType()),
5038 LHS, RHS);
5039 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
5040 DAG.getConstant(0, dl, MVT::i32));
5041 Value = Value.getValue(0); // We only want the low 32 bits for the result.
5042 break;
5043 case ISD::SMULO:
5044 // We generate a SMUL_LOHI and then check if all the bits of the high word
5045 // are the same as the sign bit of the low word.
5046 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
5047 Value = DAG.getNode(ISD::SMUL_LOHI, dl,
5048 DAG.getVTList(Op.getValueType(), Op.getValueType()),
5049 LHS, RHS);
5050 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
5051 DAG.getNode(ISD::SRA, dl, Op.getValueType(),
5052 Value.getValue(0),
5053 DAG.getConstant(31, dl, MVT::i32)));
5054 Value = Value.getValue(0); // We only want the low 32 bits for the result.
5055 break;
5056 } // switch (...)
5057
5058 return std::make_pair(Value, OverflowCmp);
5059}
5060
5061SDValue
5062ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
5063 // Let legalize expand this if it isn't a legal type yet.
5064 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
5065 return SDValue();
5066
5067 SDValue Value, OverflowCmp;
5068 SDValue ARMcc;
5069 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
5070 SDLoc dl(Op);
5071 // We use 0 and 1 as false and true values.
5072 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
5073 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
5074 EVT VT = Op.getValueType();
5075
5076 SDValue Overflow =
5077 DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp);
5078
5079 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
5080 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
5081}
5082
5084 SelectionDAG &DAG) {
5085 SDLoc DL(BoolCarry);
5086 EVT CarryVT = BoolCarry.getValueType();
5087
5088 // This converts the boolean value carry into the carry flag by doing
5089 // ARMISD::SUBC Carry, 1
5090 SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
5091 DAG.getVTList(CarryVT, MVT::i32),
5092 BoolCarry, DAG.getConstant(1, DL, CarryVT));
5093 return Carry.getValue(1);
5094}
5095
5097 SelectionDAG &DAG) {
5098 SDLoc DL(Flags);
5099
5100 // Now convert the carry flag into a boolean carry. We do this
5101 // using ARMISD:ADDE 0, 0, Carry
5102 return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
5103 DAG.getConstant(0, DL, MVT::i32),
5104 DAG.getConstant(0, DL, MVT::i32), Flags);
5105}
5106
5107SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
5108 SelectionDAG &DAG) const {
5109 // Let legalize expand this if it isn't a legal type yet.
5110 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
5111 return SDValue();
5112
5113 SDValue LHS = Op.getOperand(0);
5114 SDValue RHS = Op.getOperand(1);
5115 SDLoc dl(Op);
5116
5117 EVT VT = Op.getValueType();
5118 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
5119 SDValue Value;
5120 SDValue Overflow;
5121 switch (Op.getOpcode()) {
5122 default:
5123 llvm_unreachable("Unknown overflow instruction!");
5124 case ISD::UADDO:
5125 Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
5126 // Convert the carry flag into a boolean value.
5127 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5128 break;
5129 case ISD::USUBO: {
5130 Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
5131 // Convert the carry flag into a boolean value.
5132 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5133 // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
5134 // value. So compute 1 - C.
5135 Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
5136 DAG.getConstant(1, dl, MVT::i32), Overflow);
5137 break;
5138 }
5139 }
5140
5141 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
5142}
5143
5145 const ARMSubtarget *Subtarget) {
5146 EVT VT = Op.getValueType();
5147 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || Subtarget->isThumb1Only())
5148 return SDValue();
5149 if (!VT.isSimple())
5150 return SDValue();
5151
5152 unsigned NewOpcode;
5153 switch (VT.getSimpleVT().SimpleTy) {
5154 default:
5155 return SDValue();
5156 case MVT::i8:
5157 switch (Op->getOpcode()) {
5158 case ISD::UADDSAT:
5159 NewOpcode = ARMISD::UQADD8b;
5160 break;
5161 case ISD::SADDSAT:
5162 NewOpcode = ARMISD::QADD8b;
5163 break;
5164 case ISD::USUBSAT:
5165 NewOpcode = ARMISD::UQSUB8b;
5166 break;
5167 case ISD::SSUBSAT:
5168 NewOpcode = ARMISD::QSUB8b;
5169 break;
5170 }
5171 break;
5172 case MVT::i16:
5173 switch (Op->getOpcode()) {
5174 case ISD::UADDSAT:
5175 NewOpcode = ARMISD::UQADD16b;
5176 break;
5177 case ISD::SADDSAT:
5178 NewOpcode = ARMISD::QADD16b;
5179 break;
5180 case ISD::USUBSAT:
5181 NewOpcode = ARMISD::UQSUB16b;
5182 break;
5183 case ISD::SSUBSAT:
5184 NewOpcode = ARMISD::QSUB16b;
5185 break;
5186 }
5187 break;
5188 }
5189
5190 SDLoc dl(Op);
5191 SDValue Add =
5192 DAG.getNode(NewOpcode, dl, MVT::i32,
5193 DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
5194 DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
5195 return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
5196}
5197
5198SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
5199 SDValue Cond = Op.getOperand(0);
5200 SDValue SelectTrue = Op.getOperand(1);
5201 SDValue SelectFalse = Op.getOperand(2);
5202 SDLoc dl(Op);
5203 unsigned Opc = Cond.getOpcode();
5204
5205 if (Cond.getResNo() == 1 &&
5206 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5207 Opc == ISD::USUBO)) {
5208 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5209 return SDValue();
5210
5211 SDValue Value, OverflowCmp;
5212 SDValue ARMcc;
5213 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5214 EVT VT = Op.getValueType();
5215
5216 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, OverflowCmp, DAG);
5217 }
5218
5219 // Convert:
5220 //
5221 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
5222 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
5223 //
5224 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
5225 const ConstantSDNode *CMOVTrue =
5226 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
5227 const ConstantSDNode *CMOVFalse =
5228 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
5229
5230 if (CMOVTrue && CMOVFalse) {
5231 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
5232 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
5233
5234 SDValue True;
5235 SDValue False;
5236 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
5237 True = SelectTrue;
5238 False = SelectFalse;
5239 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
5240 True = SelectFalse;
5241 False = SelectTrue;
5242 }
5243
5244 if (True.getNode() && False.getNode())
5245 return getCMOV(dl, Op.getValueType(), True, False, Cond.getOperand(2),
5246 Cond.getOperand(3), DAG);
5247 }
5248 }
5249
5250 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
5251 // undefined bits before doing a full-word comparison with zero.
5252 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
5253 DAG.getConstant(1, dl, Cond.getValueType()));
5254
5255 return DAG.getSelectCC(dl, Cond,
5256 DAG.getConstant(0, dl, Cond.getValueType()),
5257 SelectTrue, SelectFalse, ISD::SETNE);
5258}
5259
5261 bool &swpCmpOps, bool &swpVselOps) {
5262 // Start by selecting the GE condition code for opcodes that return true for
5263 // 'equality'
5264 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
5265 CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE)
5266 CondCode = ARMCC::GE;
5267
5268 // and GT for opcodes that return false for 'equality'.
5269 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
5270 CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT)
5271 CondCode = ARMCC::GT;
5272
5273 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
5274 // to swap the compare operands.
5275 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
5276 CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT)
5277 swpCmpOps = true;
5278
5279 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
5280 // If we have an unordered opcode, we need to swap the operands to the VSEL
5281 // instruction (effectively negating the condition).
5282 //
5283 // This also has the effect of swapping which one of 'less' or 'greater'
5284 // returns true, so we also swap the compare operands. It also switches
5285 // whether we return true for 'equality', so we compensate by picking the
5286 // opposite condition code to our original choice.
5287 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
5288 CC == ISD::SETUGT) {
5289 swpCmpOps = !swpCmpOps;
5290 swpVselOps = !swpVselOps;
5291 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
5292 }
5293
5294 // 'ordered' is 'anything but unordered', so use the VS condition code and
5295 // swap the VSEL operands.
5296 if (CC == ISD::SETO) {
5297 CondCode = ARMCC::VS;
5298 swpVselOps = true;
5299 }
5300
5301 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
5302 // code and swap the VSEL operands. Also do this if we don't care about the
5303 // unordered case.
5304 if (CC == ISD::SETUNE || CC == ISD::SETNE) {
5305 CondCode = ARMCC::EQ;
5306 swpVselOps = true;
5307 }
5308}
5309
5310SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
5311 SDValue TrueVal, SDValue ARMcc,
5312 SDValue Flags, SelectionDAG &DAG) const {
5313 if (!Subtarget->hasFP64() && VT == MVT::f64) {
5315 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
5317 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
5318
5319 SDValue TrueLow = TrueVal.getValue(0);
5320 SDValue TrueHigh = TrueVal.getValue(1);
5321 SDValue FalseLow = FalseVal.getValue(0);
5322 SDValue FalseHigh = FalseVal.getValue(1);
5323
5324 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
5325 ARMcc, Flags);
5326 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
5327 ARMcc, Flags);
5328
5329 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
5330 }
5331 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, Flags);
5332}
5333
5335 return CC == ISD::SETGT || CC == ISD::SETGE;
5336}
5337
5339 return CC == ISD::SETLT || CC == ISD::SETLE;
5340}
5341
5342// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
5343// All of these conditions (and their <= and >= counterparts) will do:
5344// x < k ? k : x
5345// x > k ? x : k
5346// k < x ? x : k
5347// k > x ? k : x
5348static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
5349 const SDValue TrueVal, const SDValue FalseVal,
5350 const ISD::CondCode CC, const SDValue K) {
5351 return (isGTorGE(CC) &&
5352 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
5353 (isLTorLE(CC) &&
5354 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
5355}
5356
5357// Check if two chained conditionals could be converted into SSAT or USAT.
5358//
5359// SSAT can replace a set of two conditional selectors that bound a number to an
5360// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
5361//
5362// x < -k ? -k : (x > k ? k : x)
5363// x < -k ? -k : (x < k ? x : k)
5364// x > -k ? (x > k ? k : x) : -k
5365// x < k ? (x < -k ? -k : x) : k
5366// etc.
5367//
5368// LLVM canonicalizes these to either a min(max()) or a max(min())
5369// pattern. This function tries to match one of these and will return a SSAT
5370// node if successful.
5371//
5372// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
5373// is a power of 2.
5375 EVT VT = Op.getValueType();
5376 SDValue V1 = Op.getOperand(0);
5377 SDValue K1 = Op.getOperand(1);
5378 SDValue TrueVal1 = Op.getOperand(2);
5379 SDValue FalseVal1 = Op.getOperand(3);
5380 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5381
5382 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
5383 if (Op2.getOpcode() != ISD::SELECT_CC)
5384 return SDValue();
5385
5386 SDValue V2 = Op2.getOperand(0);
5387 SDValue K2 = Op2.getOperand(1);
5388 SDValue TrueVal2 = Op2.getOperand(2);
5389 SDValue FalseVal2 = Op2.getOperand(3);
5390 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
5391
5392 SDValue V1Tmp = V1;
5393 SDValue V2Tmp = V2;
5394
5395 // Check that the registers and the constants match a max(min()) or min(max())
5396 // pattern
5397 if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
5398 K2 != FalseVal2 ||
5399 !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
5400 return SDValue();
5401
5402 // Check that the constant in the lower-bound check is
5403 // the opposite of the constant in the upper-bound check
5404 // in 1's complement.
5405 if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))
5406 return SDValue();
5407
5408 int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
5409 int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
5410 int64_t PosVal = std::max(Val1, Val2);
5411 int64_t NegVal = std::min(Val1, Val2);
5412
5413 if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
5414 !isPowerOf2_64(PosVal + 1))
5415 return SDValue();
5416
5417 // Handle the difference between USAT (unsigned) and SSAT (signed)
5418 // saturation
5419 // At this point, PosVal is guaranteed to be positive
5420 uint64_t K = PosVal;
5421 SDLoc dl(Op);
5422 if (Val1 == ~Val2)
5423 return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
5424 DAG.getConstant(llvm::countr_one(K), dl, VT));
5425 if (NegVal == 0)
5426 return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
5427 DAG.getConstant(llvm::countr_one(K), dl, VT));
5428
5429 return SDValue();
5430}
5431
5432// Check if a condition of the type x < k ? k : x can be converted into a
5433// bit operation instead of conditional moves.
5434// Currently this is allowed given:
5435// - The conditions and values match up
5436// - k is 0 or -1 (all ones)
5437// This function will not check the last condition, thats up to the caller
5438// It returns true if the transformation can be made, and in such case
5439// returns x in V, and k in SatK.
5441 SDValue &SatK)
5442{
5443 SDValue LHS = Op.getOperand(0);
5444 SDValue RHS = Op.getOperand(1);
5445 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5446 SDValue TrueVal = Op.getOperand(2);
5447 SDValue FalseVal = Op.getOperand(3);
5448
5449 SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
5450 ? &RHS
5451 : nullptr;
5452
5453 // No constant operation in comparison, early out
5454 if (!K)
5455 return false;
5456
5457 SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
5458 V = (KTmp == TrueVal) ? FalseVal : TrueVal;
5459 SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
5460
5461 // If the constant on left and right side, or variable on left and right,
5462 // does not match, early out
5463 if (*K != KTmp || V != VTmp)
5464 return false;
5465
5466 if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
5467 SatK = *K;
5468 return true;
5469 }
5470
5471 return false;
5472}
5473
5474bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
5475 if (VT == MVT::f32)
5476 return !Subtarget->hasVFP2Base();
5477 if (VT == MVT::f64)
5478 return !Subtarget->hasFP64();
5479 if (VT == MVT::f16)
5480 return !Subtarget->hasFullFP16();
5481 return false;
5482}
5483
5484SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5485 EVT VT = Op.getValueType();
5486 SDLoc dl(Op);
5487
5488 // Try to convert two saturating conditional selects into a single SSAT
5489 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5490 if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
5491 return SatValue;
5492
5493 // Try to convert expressions of the form x < k ? k : x (and similar forms)
5494 // into more efficient bit operations, which is possible when k is 0 or -1
5495 // On ARM and Thumb-2 which have flexible operand 2 this will result in
5496 // single instructions. On Thumb the shift and the bit operation will be two
5497 // instructions.
5498 // Only allow this transformation on full-width (32-bit) operations
5499 SDValue LowerSatConstant;
5500 SDValue SatValue;
5501 if (VT == MVT::i32 &&
5502 isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
5503 SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
5504 DAG.getConstant(31, dl, VT));
5505 if (isNullConstant(LowerSatConstant)) {
5506 SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
5507 DAG.getAllOnesConstant(dl, VT));
5508 return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
5509 } else if (isAllOnesConstant(LowerSatConstant))
5510 return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
5511 }
5512
5513 SDValue LHS = Op.getOperand(0);
5514 SDValue RHS = Op.getOperand(1);
5515 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5516 SDValue TrueVal = Op.getOperand(2);
5517 SDValue FalseVal = Op.getOperand(3);
5518 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
5519 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
5520
5521 if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5522 LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
5523 unsigned TVal = CTVal->getZExtValue();
5524 unsigned FVal = CFVal->getZExtValue();
5525 unsigned Opcode = 0;
5526
5527 if (TVal == ~FVal) {
5528 Opcode = ARMISD::CSINV;
5529 } else if (TVal == ~FVal + 1) {
5530 Opcode = ARMISD::CSNEG;
5531 } else if (TVal + 1 == FVal) {
5532 Opcode = ARMISD::CSINC;
5533 } else if (TVal == FVal + 1) {
5534 Opcode = ARMISD::CSINC;
5535 std::swap(TrueVal, FalseVal);
5536 std::swap(TVal, FVal);
5537 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5538 }
5539
5540 if (Opcode) {
5541 // If one of the constants is cheaper than another, materialise the
5542 // cheaper one and let the csel generate the other.
5543 if (Opcode != ARMISD::CSINC &&
5544 HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
5545 std::swap(TrueVal, FalseVal);
5546 std::swap(TVal, FVal);
5547 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5548 }
5549
5550 // Attempt to use ZR checking TVal is 0, possibly inverting the condition
5551 // to get there. CSINC not is invertable like the other two (~(~a) == a,
5552 // -(-a) == a, but (a+1)+1 != a).
5553 if (FVal == 0 && Opcode != ARMISD::CSINC) {
5554 std::swap(TrueVal, FalseVal);
5555 std::swap(TVal, FVal);
5556 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5557 }
5558
5559 // Drops F's value because we can get it by inverting/negating TVal.
5560 FalseVal = TrueVal;
5561
5562 SDValue ARMcc;
5563 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5564 EVT VT = TrueVal.getValueType();
5565 return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
5566 }
5567 }
5568
5569 if (isUnsupportedFloatingType(LHS.getValueType())) {
5571 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5572
5573 // If softenSetCCOperands only returned one value, we should compare it to
5574 // zero.
5575 if (!RHS.getNode()) {
5576 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5577 CC = ISD::SETNE;
5578 }
5579 }
5580
5581 if (LHS.getValueType() == MVT::i32) {
5582 // Try to generate VSEL on ARMv8.
5583 // The VSEL instruction can't use all the usual ARM condition
5584 // codes: it only has two bits to select the condition code, so it's
5585 // constrained to use only GE, GT, VS and EQ.
5586 //
5587 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
5588 // swap the operands of the previous compare instruction (effectively
5589 // inverting the compare condition, swapping 'less' and 'greater') and
5590 // sometimes need to swap the operands to the VSEL (which inverts the
5591 // condition in the sense of firing whenever the previous condition didn't)
5592 if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
5593 TrueVal.getValueType() == MVT::f32 ||
5594 TrueVal.getValueType() == MVT::f64)) {
5596 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
5597 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
5598 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5599 std::swap(TrueVal, FalseVal);
5600 }
5601 }
5602
5603 SDValue ARMcc;
5604 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5605 // Choose GE over PL, which vsel does now support
5606 if (ARMcc->getAsZExtVal() == ARMCC::PL)
5607 ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
5608 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
5609 }
5610
5611 ARMCC::CondCodes CondCode, CondCode2;
5612 FPCCToARMCC(CC, CondCode, CondCode2);
5613
5614 // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
5615 // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
5616 // must use VSEL (limited condition codes), due to not having conditional f16
5617 // moves.
5618 if (Subtarget->hasFPARMv8Base() &&
5619 !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
5620 (TrueVal.getValueType() == MVT::f16 ||
5621 TrueVal.getValueType() == MVT::f32 ||
5622 TrueVal.getValueType() == MVT::f64)) {
5623 bool swpCmpOps = false;
5624 bool swpVselOps = false;
5625 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
5626
5627 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
5628 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
5629 if (swpCmpOps)
5630 std::swap(LHS, RHS);
5631 if (swpVselOps)
5632 std::swap(TrueVal, FalseVal);
5633 }
5634 }
5635
5636 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5637 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5638 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
5639 if (CondCode2 != ARMCC::AL) {
5640 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
5641 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, Cmp, DAG);
5642 }
5643 return Result;
5644}
5645
5646/// canChangeToInt - Given the fp compare operand, return true if it is suitable
5647/// to morph to an integer compare sequence.
5648static bool canChangeToInt(SDValue Op, bool &SeenZero,
5649 const ARMSubtarget *Subtarget) {
5650 SDNode *N = Op.getNode();
5651 if (!N->hasOneUse())
5652 // Otherwise it requires moving the value from fp to integer registers.
5653 return false;
5654 if (!N->getNumValues())
5655 return false;
5656 EVT VT = Op.getValueType();
5657 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5658 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
5659 // vmrs are very slow, e.g. cortex-a8.
5660 return false;
5661
5662 if (isFloatingPointZero(Op)) {
5663 SeenZero = true;
5664 return true;
5665 }
5666 return ISD::isNormalLoad(N);
5667}
5668
5671 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
5672
5673 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
5674 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
5675 Ld->getPointerInfo(), Ld->getAlign(),
5676 Ld->getMemOperand()->getFlags());
5677
5678 llvm_unreachable("Unknown VFP cmp argument!");
5679}
5680
5682 SDValue &RetVal1, SDValue &RetVal2) {
5683 SDLoc dl(Op);
5684
5685 if (isFloatingPointZero(Op)) {
5686 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
5687 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
5688 return;
5689 }
5690
5691 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
5692 SDValue Ptr = Ld->getBasePtr();
5693 RetVal1 =
5694 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5695 Ld->getAlign(), Ld->getMemOperand()->getFlags());
5696
5697 EVT PtrType = Ptr.getValueType();
5698 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
5699 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
5700 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5701 Ld->getPointerInfo().getWithOffset(4),
5702 commonAlignment(Ld->getAlign(), 4),
5703 Ld->getMemOperand()->getFlags());
5704 return;
5705 }
5706
5707 llvm_unreachable("Unknown VFP cmp argument!");
5708}
5709
5710/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
5711/// f32 and even f64 comparisons to integer ones.
5712SDValue
5713ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
5714 SDValue Chain = Op.getOperand(0);
5715 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5716 SDValue LHS = Op.getOperand(2);
5717 SDValue RHS = Op.getOperand(3);
5718 SDValue Dest = Op.getOperand(4);
5719 SDLoc dl(Op);
5720
5721 bool LHSSeenZero = false;
5722 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
5723 bool RHSSeenZero = false;
5724 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
5725 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5726 // If unsafe fp math optimization is enabled and there are no other uses of
5727 // the CMP operands, and the condition code is EQ or NE, we can optimize it
5728 // to an integer comparison.
5729 if (CC == ISD::SETOEQ)
5730 CC = ISD::SETEQ;
5731 else if (CC == ISD::SETUNE)
5732 CC = ISD::SETNE;
5733
5734 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5735 SDValue ARMcc;
5736 if (LHS.getValueType() == MVT::f32) {
5737 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5738 bitcastf32Toi32(LHS, DAG), Mask);
5739 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5740 bitcastf32Toi32(RHS, DAG), Mask);
5741 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5742 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5743 Cmp);
5744 }
5745
5746 SDValue LHS1, LHS2;
5747 SDValue RHS1, RHS2;
5748 expandf64Toi32(LHS, DAG, LHS1, LHS2);
5749 expandf64Toi32(RHS, DAG, RHS1, RHS2);
5750 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
5751 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
5753 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5754 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5755 return DAG.getNode(ARMISD::BCC_i64, dl, MVT::Other, Ops);
5756 }
5757
5758 return SDValue();
5759}
5760
5761SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
5762 SDValue Chain = Op.getOperand(0);
5763 SDValue Cond = Op.getOperand(1);
5764 SDValue Dest = Op.getOperand(2);
5765 SDLoc dl(Op);
5766
5767 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5768 // instruction.
5769 unsigned Opc = Cond.getOpcode();
5770 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5771 !Subtarget->isThumb1Only();
5772 if (Cond.getResNo() == 1 &&
5773 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5774 Opc == ISD::USUBO || OptimizeMul)) {
5775 // Only lower legal XALUO ops.
5776 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5777 return SDValue();
5778
5779 // The actual operation with overflow check.
5780 SDValue Value, OverflowCmp;
5781 SDValue ARMcc;
5782 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5783
5784 // Reverse the condition code.
5786 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5788 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5789
5790 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5791 OverflowCmp);
5792 }
5793
5794 return SDValue();
5795}
5796
5797SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
5798 SDValue Chain = Op.getOperand(0);
5799 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5800 SDValue LHS = Op.getOperand(2);
5801 SDValue RHS = Op.getOperand(3);
5802 SDValue Dest = Op.getOperand(4);
5803 SDLoc dl(Op);
5804
5805 if (isUnsupportedFloatingType(LHS.getValueType())) {
5807 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5808
5809 // If softenSetCCOperands only returned one value, we should compare it to
5810 // zero.
5811 if (!RHS.getNode()) {
5812 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5813 CC = ISD::SETNE;
5814 }
5815 }
5816
5817 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5818 // instruction.
5819 unsigned Opc = LHS.getOpcode();
5820 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5821 !Subtarget->isThumb1Only();
5822 if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
5823 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5824 Opc == ISD::USUBO || OptimizeMul) &&
5825 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
5826 // Only lower legal XALUO ops.
5827 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
5828 return SDValue();
5829
5830 // The actual operation with overflow check.
5831 SDValue Value, OverflowCmp;
5832 SDValue ARMcc;
5833 std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
5834
5835 if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
5836 // Reverse the condition code.
5838 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5840 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5841 }
5842
5843 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5844 OverflowCmp);
5845 }
5846
5847 if (LHS.getValueType() == MVT::i32) {
5848 SDValue ARMcc;
5849 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5850 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, Cmp);
5851 }
5852
5853 if (getTargetMachine().Options.UnsafeFPMath &&
5854 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
5855 CC == ISD::SETNE || CC == ISD::SETUNE)) {
5856 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
5857 return Result;
5858 }
5859
5860 ARMCC::CondCodes CondCode, CondCode2;
5861 FPCCToARMCC(CC, CondCode, CondCode2);
5862
5863 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5864 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5865 SDValue Ops[] = {Chain, Dest, ARMcc, Cmp};
5866 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops);
5867 if (CondCode2 != ARMCC::AL) {
5868 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
5869 SDValue Ops[] = {Res, Dest, ARMcc, Cmp};
5870 Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops);
5871 }
5872 return Res;
5873}
5874
5875SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
5876 SDValue Chain = Op.getOperand(0);
5877 SDValue Table = Op.getOperand(1);
5878 SDValue Index = Op.getOperand(2);
5879 SDLoc dl(Op);
5880
5881 EVT PTy = getPointerTy(DAG.getDataLayout());
5882 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
5883 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
5884 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
5885 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
5886 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
5887 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5888 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
5889 // which does another jump to the destination. This also makes it easier
5890 // to translate it to TBB / TBH later (Thumb2 only).
5891 // FIXME: This might not work if the function is extremely large.
5892 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
5893 Addr, Op.getOperand(2), JTI);
5894 }
5895 if (isPositionIndependent() || Subtarget->isROPI()) {
5896 Addr =
5897 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
5899 Chain = Addr.getValue(1);
5900 Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
5901 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5902 } else {
5903 Addr =
5904 DAG.getLoad(PTy, dl, Chain, Addr,
5906 Chain = Addr.getValue(1);
5907 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5908 }
5909}
5910
5912 EVT VT = Op.getValueType();
5913 SDLoc dl(Op);
5914
5915 if (Op.getValueType().getVectorElementType() == MVT::i32) {
5916 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
5917 return Op;
5918 return DAG.UnrollVectorOp(Op.getNode());
5919 }
5920
5921 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5922
5923 EVT NewTy;
5924 const EVT OpTy = Op.getOperand(0).getValueType();
5925 if (OpTy == MVT::v4f32)
5926 NewTy = MVT::v4i32;
5927 else if (OpTy == MVT::v4f16 && HasFullFP16)
5928 NewTy = MVT::v4i16;
5929 else if (OpTy == MVT::v8f16 && HasFullFP16)
5930 NewTy = MVT::v8i16;
5931 else
5932 llvm_unreachable("Invalid type for custom lowering!");
5933
5934 if (VT != MVT::v4i16 && VT != MVT::v8i16)
5935 return DAG.UnrollVectorOp(Op.getNode());
5936
5937 Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
5938 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
5939}
5940
5941SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
5942 EVT VT = Op.getValueType();
5943 if (VT.isVector())
5944 return LowerVectorFP_TO_INT(Op, DAG);
5945
5946 bool IsStrict = Op->isStrictFPOpcode();
5947 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
5948
5949 if (isUnsupportedFloatingType(SrcVal.getValueType())) {
5950 RTLIB::Libcall LC;
5951 if (Op.getOpcode() == ISD::FP_TO_SINT ||
5952 Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
5953 LC = RTLIB::getFPTOSINT(SrcVal.getValueType(),
5954 Op.getValueType());
5955 else
5956 LC = RTLIB::getFPTOUINT(SrcVal.getValueType(),
5957 Op.getValueType());
5958 SDLoc Loc(Op);
5959 MakeLibCallOptions CallOptions;
5960 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
5962 std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
5963 CallOptions, Loc, Chain);
5964 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
5965 }
5966
5967 // FIXME: Remove this when we have strict fp instruction selection patterns
5968 if (IsStrict) {
5969 SDLoc Loc(Op);
5970 SDValue Result =
5973 Loc, Op.getValueType(), SrcVal);
5974 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
5975 }
5976
5977 return Op;
5978}
5979
5981 const ARMSubtarget *Subtarget) {
5982 EVT VT = Op.getValueType();
5983 EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
5984 EVT FromVT = Op.getOperand(0).getValueType();
5985
5986 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32)
5987 return Op;
5988 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 &&
5989 Subtarget->hasFP64())
5990 return Op;
5991 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 &&
5992 Subtarget->hasFullFP16())
5993 return Op;
5994 if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 &&
5995 Subtarget->hasMVEFloatOps())
5996 return Op;
5997 if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 &&
5998 Subtarget->hasMVEFloatOps())
5999 return Op;
6000
6001 if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16)
6002 return SDValue();
6003
6004 SDLoc DL(Op);
6005 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
6006 unsigned BW = ToVT.getScalarSizeInBits() - IsSigned;
6007 SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
6008 DAG.getValueType(VT.getScalarType()));
6009 SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT,
6010 DAG.getConstant((1 << BW) - 1, DL, VT));
6011 if (IsSigned)
6012 Max = DAG.getNode(ISD::SMAX, DL, VT, Max,
6013 DAG.getSignedConstant(-(1 << BW), DL, VT));
6014 return Max;
6015}
6016
6018 EVT VT = Op.getValueType();
6019 SDLoc dl(Op);
6020
6021 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
6022 if (VT.getVectorElementType() == MVT::f32)
6023 return Op;
6024 return DAG.UnrollVectorOp(Op.getNode());
6025 }
6026
6027 assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||
6028 Op.getOperand(0).getValueType() == MVT::v8i16) &&
6029 "Invalid type for custom lowering!");
6030
6031 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
6032
6033 EVT DestVecType;
6034 if (VT == MVT::v4f32)
6035 DestVecType = MVT::v4i32;
6036 else if (VT == MVT::v4f16 && HasFullFP16)
6037 DestVecType = MVT::v4i16;
6038 else if (VT == MVT::v8f16 && HasFullFP16)
6039 DestVecType = MVT::v8i16;
6040 else
6041 return DAG.UnrollVectorOp(Op.getNode());
6042
6043 unsigned CastOpc;
6044 unsigned Opc;
6045 switch (Op.getOpcode()) {
6046 default: llvm_unreachable("Invalid opcode!");
6047 case ISD::SINT_TO_FP:
6048 CastOpc = ISD::SIGN_EXTEND;
6049 Opc = ISD::SINT_TO_FP;
6050 break;
6051 case ISD::UINT_TO_FP:
6052 CastOpc = ISD::ZERO_EXTEND;
6053 Opc = ISD::UINT_TO_FP;
6054 break;
6055 }
6056
6057 Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
6058 return DAG.getNode(Opc, dl, VT, Op);
6059}
6060
6061SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
6062 EVT VT = Op.getValueType();
6063 if (VT.isVector())
6064 return LowerVectorINT_TO_FP(Op, DAG);
6065 if (isUnsupportedFloatingType(VT)) {
6066 RTLIB::Libcall LC;
6067 if (Op.getOpcode() == ISD::SINT_TO_FP)
6068 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
6069 Op.getValueType());
6070 else
6071 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
6072 Op.getValueType());
6073 MakeLibCallOptions CallOptions;
6074 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
6075 CallOptions, SDLoc(Op)).first;
6076 }
6077
6078 return Op;
6079}
6080
6081SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
6082 // Implement fcopysign with a fabs and a conditional fneg.
6083 SDValue Tmp0 = Op.getOperand(0);
6084 SDValue Tmp1 = Op.getOperand(1);
6085 SDLoc dl(Op);
6086 EVT VT = Op.getValueType();
6087 EVT SrcVT = Tmp1.getValueType();
6088 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
6089 Tmp0.getOpcode() == ARMISD::VMOVDRR;
6090 bool UseNEON = !InGPR && Subtarget->hasNEON();
6091
6092 if (UseNEON) {
6093 // Use VBSL to copy the sign bit.
6094 unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80);
6095 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
6096 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
6097 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
6098 if (VT == MVT::f64)
6099 Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6100 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
6101 DAG.getConstant(32, dl, MVT::i32));
6102 else /*if (VT == MVT::f32)*/
6103 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
6104 if (SrcVT == MVT::f32) {
6105 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
6106 if (VT == MVT::f64)
6107 Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6108 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
6109 DAG.getConstant(32, dl, MVT::i32));
6110 } else if (VT == MVT::f32)
6111 Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
6112 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
6113 DAG.getConstant(32, dl, MVT::i32));
6114 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
6115 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
6116
6118 dl, MVT::i32);
6119 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
6120 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
6121 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
6122
6123 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
6124 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
6125 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
6126 if (VT == MVT::f32) {
6127 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
6128 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
6129 DAG.getConstant(0, dl, MVT::i32));
6130 } else {
6131 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
6132 }
6133
6134 return Res;
6135 }
6136
6137 // Bitcast operand 1 to i32.
6138 if (SrcVT == MVT::f64)
6139 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6140 Tmp1).getValue(1);
6141 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
6142
6143 // Or in the signbit with integer operations.
6144 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
6145 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
6146 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
6147 if (VT == MVT::f32) {
6148 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
6149 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
6150 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
6151 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
6152 }
6153
6154 // f64: Or the high part with signbit and then combine two parts.
6155 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6156 Tmp0);
6157 SDValue Lo = Tmp0.getValue(0);
6158 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
6159 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
6160 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
6161}
6162
6163SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
6165 MachineFrameInfo &MFI = MF.getFrameInfo();
6166 MFI.setReturnAddressIsTaken(true);
6167
6169 return SDValue();
6170
6171 EVT VT = Op.getValueType();
6172 SDLoc dl(Op);
6173 unsigned Depth = Op.getConstantOperandVal(0);
6174 if (Depth) {
6175 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
6176 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
6177 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
6178 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
6180 }
6181
6182 // Return LR, which contains the return address. Mark it an implicit live-in.
6183 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
6184 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
6185}
6186
6187SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
6188 const ARMBaseRegisterInfo &ARI =
6189 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
6191 MachineFrameInfo &MFI = MF.getFrameInfo();
6192 MFI.setFrameAddressIsTaken(true);
6193
6194 EVT VT = Op.getValueType();
6195 SDLoc dl(Op); // FIXME probably not meaningful
6196 unsigned Depth = Op.getConstantOperandVal(0);
6197 Register FrameReg = ARI.getFrameRegister(MF);
6198 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
6199 while (Depth--)
6200 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
6202 return FrameAddr;
6203}
6204
6205// FIXME? Maybe this could be a TableGen attribute on some registers and
6206// this table could be generated automatically from RegInfo.
6207Register ARMTargetLowering::getRegisterByName(const char* RegName, LLT VT,
6208 const MachineFunction &MF) const {
6210 .Case("sp", ARM::SP)
6211 .Default(0);
6212 if (Reg)
6213 return Reg;
6214 report_fatal_error(Twine("Invalid register name \""
6215 + StringRef(RegName) + "\"."));
6216}
6217
6218// Result is 64 bit value so split into two 32 bit values and return as a
6219// pair of values.
6221 SelectionDAG &DAG) {
6222 SDLoc DL(N);
6223
6224 // This function is only supposed to be called for i64 type destination.
6225 assert(N->getValueType(0) == MVT::i64
6226 && "ExpandREAD_REGISTER called for non-i64 type result.");
6227
6229 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
6230 N->getOperand(0),
6231 N->getOperand(1));
6232
6233 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
6234 Read.getValue(1)));
6235 Results.push_back(Read.getOperand(0));
6236}
6237
6238/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
6239/// When \p DstVT, the destination type of \p BC, is on the vector
6240/// register bank and the source of bitcast, \p Op, operates on the same bank,
6241/// it might be possible to combine them, such that everything stays on the
6242/// vector register bank.
6243/// \p return The node that would replace \p BT, if the combine
6244/// is possible.
6246 SelectionDAG &DAG) {
6247 SDValue Op = BC->getOperand(0);
6248 EVT DstVT = BC->getValueType(0);
6249
6250 // The only vector instruction that can produce a scalar (remember,
6251 // since the bitcast was about to be turned into VMOVDRR, the source
6252 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
6253 // Moreover, we can do this combine only if there is one use.
6254 // Finally, if the destination type is not a vector, there is not
6255 // much point on forcing everything on the vector bank.
6256 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6257 !Op.hasOneUse())
6258 return SDValue();
6259
6260 // If the index is not constant, we will introduce an additional
6261 // multiply that will stick.
6262 // Give up in that case.
6263 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
6264 if (!Index)
6265 return SDValue();
6266 unsigned DstNumElt = DstVT.getVectorNumElements();
6267
6268 // Compute the new index.
6269 const APInt &APIntIndex = Index->getAPIntValue();
6270 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
6271 NewIndex *= APIntIndex;
6272 // Check if the new constant index fits into i32.
6273 if (NewIndex.getBitWidth() > 32)
6274 return SDValue();
6275
6276 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
6277 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
6278 SDLoc dl(Op);
6279 SDValue ExtractSrc = Op.getOperand(0);
6280 EVT VecVT = EVT::getVectorVT(
6281 *DAG.getContext(), DstVT.getScalarType(),
6282 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
6283 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
6284 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
6285 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
6286}
6287
6288/// ExpandBITCAST - If the target supports VFP, this function is called to
6289/// expand a bit convert where either the source or destination type is i64 to
6290/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
6291/// operand type is illegal (e.g., v2f32 for a target that doesn't support
6292/// vectors), since the legalizer won't know what to do with that.
6293SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
6294 const ARMSubtarget *Subtarget) const {
6295 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6296 SDLoc dl(N);
6297 SDValue Op = N->getOperand(0);
6298
6299 // This function is only supposed to be called for i16 and i64 types, either
6300 // as the source or destination of the bit convert.
6301 EVT SrcVT = Op.getValueType();
6302 EVT DstVT = N->getValueType(0);
6303
6304 if ((SrcVT == MVT::i16 || SrcVT == MVT::i32) &&
6305 (DstVT == MVT::f16 || DstVT == MVT::bf16))
6306 return MoveToHPR(SDLoc(N), DAG, MVT::i32, DstVT.getSimpleVT(),
6307 DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op));
6308
6309 if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
6310 (SrcVT == MVT::f16 || SrcVT == MVT::bf16)) {
6311 if (Subtarget->hasFullFP16() && !Subtarget->hasBF16())
6312 Op = DAG.getBitcast(MVT::f16, Op);
6313 return DAG.getNode(
6314 ISD::TRUNCATE, SDLoc(N), DstVT,
6315 MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op));
6316 }
6317
6318 if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
6319 return SDValue();
6320
6321 // Turn i64->f64 into VMOVDRR.
6322 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
6323 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
6324 // if we can combine the bitcast with its source.
6326 return Val;
6327 SDValue Lo, Hi;
6328 std::tie(Lo, Hi) = DAG.SplitScalar(Op, dl, MVT::i32, MVT::i32);
6329 return DAG.getNode(ISD::BITCAST, dl, DstVT,
6330 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
6331 }
6332
6333 // Turn f64->i64 into VMOVRRD.
6334 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
6335 SDValue Cvt;
6336 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
6337 SrcVT.getVectorNumElements() > 1)
6338 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6339 DAG.getVTList(MVT::i32, MVT::i32),
6340 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
6341 else
6342 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6343 DAG.getVTList(MVT::i32, MVT::i32), Op);
6344 // Merge the pieces into a single i64 value.
6345 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
6346 }
6347
6348 return SDValue();
6349}
6350
6351/// getZeroVector - Returns a vector of specified type with all zero elements.
6352/// Zero vectors are used to represent vector negation and in those cases
6353/// will be implemented with the NEON VNEG instruction. However, VNEG does
6354/// not support i64 elements, so sometimes the zero vectors will need to be
6355/// explicitly constructed. Regardless, use a canonical VMOV to create the
6356/// zero vector.
6357static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
6358 assert(VT.isVector() && "Expected a vector type");
6359 // The canonical modified immediate encoding of a zero vector is....0!
6360 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
6361 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
6362 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
6363 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6364}
6365
6366/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
6367/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6368SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
6369 SelectionDAG &DAG) const {
6370 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6371 EVT VT = Op.getValueType();
6372 unsigned VTBits = VT.getSizeInBits();
6373 SDLoc dl(Op);
6374 SDValue ShOpLo = Op.getOperand(0);
6375 SDValue ShOpHi = Op.getOperand(1);
6376 SDValue ShAmt = Op.getOperand(2);
6377 SDValue ARMcc;
6378 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
6379
6380 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
6381
6382 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6383 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6384 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
6385 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6386 DAG.getConstant(VTBits, dl, MVT::i32));
6387 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
6388 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6389 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
6390 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6391 ISD::SETGE, ARMcc, DAG, dl);
6392 SDValue Lo =
6393 DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, ARMcc, CmpLo);
6394
6395 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
6396 SDValue HiBigShift = Opc == ISD::SRA
6397 ? DAG.getNode(Opc, dl, VT, ShOpHi,
6398 DAG.getConstant(VTBits - 1, dl, VT))
6399 : DAG.getConstant(0, dl, VT);
6400 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6401 ISD::SETGE, ARMcc, DAG, dl);
6402 SDValue Hi =
6403 DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
6404
6405 SDValue Ops[2] = { Lo, Hi };
6406 return DAG.getMergeValues(Ops, dl);
6407}
6408
6409/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
6410/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6411SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
6412 SelectionDAG &DAG) const {
6413 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6414 EVT VT = Op.getValueType();
6415 unsigned VTBits = VT.getSizeInBits();
6416 SDLoc dl(Op);
6417 SDValue ShOpLo = Op.getOperand(0);
6418 SDValue ShOpHi = Op.getOperand(1);
6419 SDValue ShAmt = Op.getOperand(2);
6420 SDValue ARMcc;
6421
6422 assert(Op.getOpcode() == ISD::SHL_PARTS);
6423 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6424 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6425 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
6426 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
6427 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6428
6429 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6430 DAG.getConstant(VTBits, dl, MVT::i32));
6431 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
6432 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6433 ISD::SETGE, ARMcc, DAG, dl);
6434 SDValue Hi =
6435 DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
6436
6437 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6438 ISD::SETGE, ARMcc, DAG, dl);
6439 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
6440 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
6441 DAG.getConstant(0, dl, VT), ARMcc, CmpLo);
6442
6443 SDValue Ops[2] = { Lo, Hi };
6444 return DAG.getMergeValues(Ops, dl);
6445}
6446
6447SDValue ARMTargetLowering::LowerGET_ROUNDING(SDValue Op,
6448 SelectionDAG &DAG) const {
6449 // The rounding mode is in bits 23:22 of the FPSCR.
6450 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
6451 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
6452 // so that the shift + and get folded into a bitfield extract.
6453 SDLoc dl(Op);
6454 SDValue Chain = Op.getOperand(0);
6455 SDValue Ops[] = {Chain,
6456 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
6457
6458 SDValue FPSCR =
6459 DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, {MVT::i32, MVT::Other}, Ops);
6460 Chain = FPSCR.getValue(1);
6461 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
6462 DAG.getConstant(1U << 22, dl, MVT::i32));
6463 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
6464 DAG.getConstant(22, dl, MVT::i32));
6465 SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
6466 DAG.getConstant(3, dl, MVT::i32));
6467 return DAG.getMergeValues({And, Chain}, dl);
6468}
6469
6470SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op,
6471 SelectionDAG &DAG) const {
6472 SDLoc DL(Op);
6473 SDValue Chain = Op->getOperand(0);
6474 SDValue RMValue = Op->getOperand(1);
6475
6476 // The rounding mode is in bits 23:22 of the FPSCR.
6477 // The llvm.set.rounding argument value to ARM rounding mode value mapping
6478 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
6479 // ((arg - 1) & 3) << 22).
6480 //
6481 // It is expected that the argument of llvm.set.rounding is within the
6482 // segment [0, 3], so NearestTiesToAway (4) is not handled here. It is
6483 // responsibility of the code generated llvm.set.rounding to ensure this
6484 // condition.
6485
6486 // Calculate new value of FPSCR[23:22].
6487 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
6488 DAG.getConstant(1, DL, MVT::i32));
6489 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
6490 DAG.getConstant(0x3, DL, MVT::i32));
6491 RMValue = DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
6492 DAG.getConstant(ARM::RoundingBitsPos, DL, MVT::i32));
6493
6494 // Get current value of FPSCR.
6495 SDValue Ops[] = {Chain,
6496 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6497 SDValue FPSCR =
6498 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6499 Chain = FPSCR.getValue(1);
6500 FPSCR = FPSCR.getValue(0);
6501
6502 // Put new rounding mode into FPSCR[23:22].
6503 const unsigned RMMask = ~(ARM::Rounding::rmMask << ARM::RoundingBitsPos);
6504 FPSCR = DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6505 DAG.getConstant(RMMask, DL, MVT::i32));
6506 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCR, RMValue);
6507 SDValue Ops2[] = {
6508 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6509 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6510}
6511
6512SDValue ARMTargetLowering::LowerSET_FPMODE(SDValue Op,
6513 SelectionDAG &DAG) const {
6514 SDLoc DL(Op);
6515 SDValue Chain = Op->getOperand(0);
6516 SDValue Mode = Op->getOperand(1);
6517
6518 // Generate nodes to build:
6519 // FPSCR = (FPSCR & FPStatusBits) | (Mode & ~FPStatusBits)
6520 SDValue Ops[] = {Chain,
6521 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6522 SDValue FPSCR =
6523 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6524 Chain = FPSCR.getValue(1);
6525 FPSCR = FPSCR.getValue(0);
6526
6527 SDValue FPSCRMasked =
6528 DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6529 DAG.getConstant(ARM::FPStatusBits, DL, MVT::i32));
6530 SDValue InputMasked =
6531 DAG.getNode(ISD::AND, DL, MVT::i32, Mode,
6532 DAG.getConstant(~ARM::FPStatusBits, DL, MVT::i32));
6533 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCRMasked, InputMasked);
6534
6535 SDValue Ops2[] = {
6536 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6537 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6538}
6539
6540SDValue ARMTargetLowering::LowerRESET_FPMODE(SDValue Op,
6541 SelectionDAG &DAG) const {
6542 SDLoc DL(Op);
6543 SDValue Chain = Op->getOperand(0);
6544
6545 // To get the default FP mode all control bits are cleared:
6546 // FPSCR = FPSCR & (FPStatusBits | FPReservedBits)
6547 SDValue Ops[] = {Chain,
6548 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6549 SDValue FPSCR =
6550 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6551 Chain = FPSCR.getValue(1);
6552 FPSCR = FPSCR.getValue(0);
6553
6554 SDValue FPSCRMasked = DAG.getNode(
6555 ISD::AND, DL, MVT::i32, FPSCR,
6557 SDValue Ops2[] = {Chain,
6558 DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32),
6559 FPSCRMasked};
6560 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6561}
6562
6564 const ARMSubtarget *ST) {
6565 SDLoc dl(N);
6566 EVT VT = N->getValueType(0);
6567 if (VT.isVector() && ST->hasNEON()) {
6568
6569 // Compute the least significant set bit: LSB = X & -X
6570 SDValue X = N->getOperand(0);
6571 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
6572 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
6573
6574 EVT ElemTy = VT.getVectorElementType();
6575
6576 if (ElemTy == MVT::i8) {
6577 // Compute with: cttz(x) = ctpop(lsb - 1)
6578 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6579 DAG.getTargetConstant(1, dl, ElemTy));
6580 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6581 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6582 }
6583
6584 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
6585 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
6586 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
6587 unsigned NumBits = ElemTy.getSizeInBits();
6588 SDValue WidthMinus1 =
6589 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6590 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
6591 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
6592 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
6593 }
6594
6595 // Compute with: cttz(x) = ctpop(lsb - 1)
6596
6597 // Compute LSB - 1.
6598 SDValue Bits;
6599 if (ElemTy == MVT::i64) {
6600 // Load constant 0xffff'ffff'ffff'ffff to register.
6601 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6602 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
6603 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
6604 } else {
6605 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6606 DAG.getTargetConstant(1, dl, ElemTy));
6607 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6608 }
6609 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6610 }
6611
6612 if (!ST->hasV6T2Ops())
6613 return SDValue();
6614
6615 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
6616 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
6617}
6618
6620 const ARMSubtarget *ST) {
6621 EVT VT = N->getValueType(0);
6622 SDLoc DL(N);
6623
6624 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
6625 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
6626 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
6627 "Unexpected type for custom ctpop lowering");
6628
6629 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6630 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
6631 SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
6632 Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
6633
6634 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
6635 unsigned EltSize = 8;
6636 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
6637 while (EltSize != VT.getScalarSizeInBits()) {
6639 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
6640 TLI.getPointerTy(DAG.getDataLayout())));
6641 Ops.push_back(Res);
6642
6643 EltSize *= 2;
6644 NumElts /= 2;
6645 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
6646 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
6647 }
6648
6649 return Res;
6650}
6651
6652/// Getvshiftimm - Check if this is a valid build_vector for the immediate
6653/// operand of a vector shift operation, where all the elements of the
6654/// build_vector must have the same constant integer value.
6655static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
6656 // Ignore bit_converts.
6657 while (Op.getOpcode() == ISD::BITCAST)
6658 Op = Op.getOperand(0);
6659 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
6660 APInt SplatBits, SplatUndef;
6661 unsigned SplatBitSize;
6662 bool HasAnyUndefs;
6663 if (!BVN ||
6664 !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6665 ElementBits) ||
6666 SplatBitSize > ElementBits)
6667 return false;
6668 Cnt = SplatBits.getSExtValue();
6669 return true;
6670}
6671
6672/// isVShiftLImm - Check if this is a valid build_vector for the immediate
6673/// operand of a vector shift left operation. That value must be in the range:
6674/// 0 <= Value < ElementBits for a left shift; or
6675/// 0 <= Value <= ElementBits for a long left shift.
6676static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
6677 assert(VT.isVector() && "vector shift count is not a vector type");
6678 int64_t ElementBits = VT.getScalarSizeInBits();
6679 if (!getVShiftImm(Op, ElementBits, Cnt))
6680 return false;
6681 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6682}
6683
6684/// isVShiftRImm - Check if this is a valid build_vector for the immediate
6685/// operand of a vector shift right operation. For a shift opcode, the value
6686/// is positive, but for an intrinsic the value count must be negative. The
6687/// absolute value must be in the range:
6688/// 1 <= |Value| <= ElementBits for a right shift; or
6689/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
6690static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
6691 int64_t &Cnt) {
6692 assert(VT.isVector() && "vector shift count is not a vector type");
6693 int64_t ElementBits = VT.getScalarSizeInBits();
6694 if (!getVShiftImm(Op, ElementBits, Cnt))
6695 return false;
6696 if (!isIntrinsic)
6697 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6698 if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6699 Cnt = -Cnt;
6700 return true;
6701 }
6702 return false;
6703}
6704
6706 const ARMSubtarget *ST) {
6707 EVT VT = N->getValueType(0);
6708 SDLoc dl(N);
6709 int64_t Cnt;
6710
6711 if (!VT.isVector())
6712 return SDValue();
6713
6714 // We essentially have two forms here. Shift by an immediate and shift by a
6715 // vector register (there are also shift by a gpr, but that is just handled
6716 // with a tablegen pattern). We cannot easily match shift by an immediate in
6717 // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
6718 // For shifting by a vector, we don't have VSHR, only VSHL (which can be
6719 // signed or unsigned, and a negative shift indicates a shift right).
6720 if (N->getOpcode() == ISD::SHL) {
6721 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
6722 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
6723 DAG.getConstant(Cnt, dl, MVT::i32));
6724 return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
6725 N->getOperand(1));
6726 }
6727
6728 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
6729 "unexpected vector shift opcode");
6730
6731 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
6732 unsigned VShiftOpc =
6733 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
6734 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
6735 DAG.getConstant(Cnt, dl, MVT::i32));
6736 }
6737
6738 // Other right shifts we don't have operations for (we use a shift left by a
6739 // negative number).
6740 EVT ShiftVT = N->getOperand(1).getValueType();
6741 SDValue NegatedCount = DAG.getNode(
6742 ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
6743 unsigned VShiftOpc =
6744 (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
6745 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
6746}
6747
6749 const ARMSubtarget *ST) {
6750 EVT VT = N->getValueType(0);
6751 SDLoc dl(N);
6752
6753 // We can get here for a node like i32 = ISD::SHL i32, i64
6754 if (VT != MVT::i64)
6755 return SDValue();
6756
6757 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||
6758 N->getOpcode() == ISD::SHL) &&
6759 "Unknown shift to lower!");
6760
6761 unsigned ShOpc = N->getOpcode();
6762 if (ST->hasMVEIntegerOps()) {
6763 SDValue ShAmt = N->getOperand(1);
6764 unsigned ShPartsOpc = ARMISD::LSLL;
6765 ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);
6766
6767 // If the shift amount is greater than 32 or has a greater bitwidth than 64
6768 // then do the default optimisation
6769 if ((!Con && ShAmt->getValueType(0).getSizeInBits() > 64) ||
6770 (Con && (Con->getAPIntValue() == 0 || Con->getAPIntValue().uge(32))))
6771 return SDValue();
6772
6773 // Extract the lower 32 bits of the shift amount if it's not an i32
6774 if (ShAmt->getValueType(0) != MVT::i32)
6775 ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32);
6776
6777 if (ShOpc == ISD::SRL) {
6778 if (!Con)
6779 // There is no t2LSRLr instruction so negate and perform an lsll if the
6780 // shift amount is in a register, emulating a right shift.
6781 ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6782 DAG.getConstant(0, dl, MVT::i32), ShAmt);
6783 else
6784 // Else generate an lsrl on the immediate shift amount
6785 ShPartsOpc = ARMISD::LSRL;
6786 } else if (ShOpc == ISD::SRA)
6787 ShPartsOpc = ARMISD::ASRL;
6788
6789 // Split Lower/Upper 32 bits of the destination/source
6790 SDValue Lo, Hi;
6791 std::tie(Lo, Hi) =
6792 DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6793 // Generate the shift operation as computed above
6794 Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
6795 ShAmt);
6796 // The upper 32 bits come from the second return value of lsll
6797 Hi = SDValue(Lo.getNode(), 1);
6798 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6799 }
6800
6801 // We only lower SRA, SRL of 1 here, all others use generic lowering.
6802 if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
6803 return SDValue();
6804
6805 // If we are in thumb mode, we don't have RRX.
6806 if (ST->isThumb1Only())
6807 return SDValue();
6808
6809 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
6810 SDValue Lo, Hi;
6811 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6812
6813 // First, build a LSRS1/ASRS1 op, which shifts the top part by one and
6814 // captures the shifted out bit into a carry flag.
6815 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::LSRS1 : ARMISD::ASRS1;
6816 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, FlagsVT), Hi);
6817
6818 // The low part is an ARMISD::RRX operand, which shifts the carry in.
6819 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
6820
6821 // Merge the pieces into a single i64 value.
6822 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6823}
6824
6826 const ARMSubtarget *ST) {
6827 bool Invert = false;
6828 bool Swap = false;
6829 unsigned Opc = ARMCC::AL;
6830
6831 SDValue Op0 = Op.getOperand(0);
6832 SDValue Op1 = Op.getOperand(1);
6833 SDValue CC = Op.getOperand(2);
6834 EVT VT = Op.getValueType();
6835 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
6836 SDLoc dl(Op);
6837
6838 EVT CmpVT;
6839 if (ST->hasNEON())
6841 else {
6842 assert(ST->hasMVEIntegerOps() &&
6843 "No hardware support for integer vector comparison!");
6844
6845 if (Op.getValueType().getVectorElementType() != MVT::i1)
6846 return SDValue();
6847
6848 // Make sure we expand floating point setcc to scalar if we do not have
6849 // mve.fp, so that we can handle them from there.
6850 if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
6851 return SDValue();
6852
6853 CmpVT = VT;
6854 }
6855
6856 if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
6857 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
6858 // Special-case integer 64-bit equality comparisons. They aren't legal,
6859 // but they can be lowered with a few vector instructions.
6860 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
6861 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
6862 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
6863 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
6864 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
6865 DAG.getCondCode(ISD::SETEQ));
6866 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
6867 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
6868 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
6869 if (SetCCOpcode == ISD::SETNE)
6870 Merged = DAG.getNOT(dl, Merged, CmpVT);
6871 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
6872 return Merged;
6873 }
6874
6875 if (CmpVT.getVectorElementType() == MVT::i64)
6876 // 64-bit comparisons are not legal in general.
6877 return SDValue();
6878
6879 if (Op1.getValueType().isFloatingPoint()) {
6880 switch (SetCCOpcode) {
6881 default: llvm_unreachable("Illegal FP comparison");
6882 case ISD::SETUNE:
6883 case ISD::SETNE:
6884 if (ST->hasMVEFloatOps()) {
6885 Opc = ARMCC::NE; break;
6886 } else {
6887 Invert = true; [[fallthrough]];
6888 }
6889 case ISD::SETOEQ:
6890 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6891 case ISD::SETOLT:
6892 case ISD::SETLT: Swap = true; [[fallthrough]];
6893 case ISD::SETOGT:
6894 case ISD::SETGT: Opc = ARMCC::GT; break;
6895 case ISD::SETOLE:
6896 case ISD::SETLE: Swap = true; [[fallthrough]];
6897 case ISD::SETOGE:
6898 case ISD::SETGE: Opc = ARMCC::GE; break;
6899 case ISD::SETUGE: Swap = true; [[fallthrough]];
6900 case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break;
6901 case ISD::SETUGT: Swap = true; [[fallthrough]];
6902 case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break;
6903 case ISD::SETUEQ: Invert = true; [[fallthrough]];
6904 case ISD::SETONE: {
6905 // Expand this to (OLT | OGT).
6906 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6907 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6908 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6909 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6910 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6911 if (Invert)
6912 Result = DAG.getNOT(dl, Result, VT);
6913 return Result;
6914 }
6915 case ISD::SETUO: Invert = true; [[fallthrough]];
6916 case ISD::SETO: {
6917 // Expand this to (OLT | OGE).
6918 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6919 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6920 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6921 DAG.getConstant(ARMCC::GE, dl, MVT::i32));
6922 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6923 if (Invert)
6924 Result = DAG.getNOT(dl, Result, VT);
6925 return Result;
6926 }
6927 }
6928 } else {
6929 // Integer comparisons.
6930 switch (SetCCOpcode) {
6931 default: llvm_unreachable("Illegal integer comparison");
6932 case ISD::SETNE:
6933 if (ST->hasMVEIntegerOps()) {
6934 Opc = ARMCC::NE; break;
6935 } else {
6936 Invert = true; [[fallthrough]];
6937 }
6938 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6939 case ISD::SETLT: Swap = true; [[fallthrough]];
6940 case ISD::SETGT: Opc = ARMCC::GT; break;
6941 case ISD::SETLE: Swap = true; [[fallthrough]];
6942 case ISD::SETGE: Opc = ARMCC::GE; break;
6943 case ISD::SETULT: Swap = true; [[fallthrough]];
6944 case ISD::SETUGT: Opc = ARMCC::HI; break;
6945 case ISD::SETULE: Swap = true; [[fallthrough]];
6946 case ISD::SETUGE: Opc = ARMCC::HS; break;
6947 }
6948
6949 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
6950 if (ST->hasNEON() && Opc == ARMCC::EQ) {
6951 SDValue AndOp;
6953 AndOp = Op0;
6954 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
6955 AndOp = Op1;
6956
6957 // Ignore bitconvert.
6958 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
6959 AndOp = AndOp.getOperand(0);
6960
6961 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
6962 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
6963 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
6964 SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1);
6965 if (!Invert)
6966 Result = DAG.getNOT(dl, Result, VT);
6967 return Result;
6968 }
6969 }
6970 }
6971
6972 if (Swap)
6973 std::swap(Op0, Op1);
6974
6975 // If one of the operands is a constant vector zero, attempt to fold the
6976 // comparison to a specialized compare-against-zero form.
6978 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::EQ ||
6979 Opc == ARMCC::NE)) {
6980 if (Opc == ARMCC::GE)
6981 Opc = ARMCC::LE;
6982 else if (Opc == ARMCC::GT)
6983 Opc = ARMCC::LT;
6984 std::swap(Op0, Op1);
6985 }
6986
6987 SDValue Result;
6989 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::LE ||
6990 Opc == ARMCC::LT || Opc == ARMCC::NE || Opc == ARMCC::EQ))
6991 Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, Op0,
6992 DAG.getConstant(Opc, dl, MVT::i32));
6993 else
6994 Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6995 DAG.getConstant(Opc, dl, MVT::i32));
6996
6997 Result = DAG.getSExtOrTrunc(Result, dl, VT);
6998
6999 if (Invert)
7000 Result = DAG.getNOT(dl, Result, VT);
7001
7002 return Result;
7003}
7004
7006 SDValue LHS = Op.getOperand(0);
7007 SDValue RHS = Op.getOperand(1);
7008 SDValue Carry = Op.getOperand(2);
7009 SDValue Cond = Op.getOperand(3);
7010 SDLoc DL(Op);
7011
7012 assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
7013
7014 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
7015 // have to invert the carry first.
7016 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
7017 DAG.getConstant(1, DL, MVT::i32), Carry);
7018 // This converts the boolean value carry into the carry flag.
7019 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
7020
7021 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
7022 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
7023
7024 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
7025 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
7026 SDValue ARMcc = DAG.getConstant(
7027 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
7028 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
7029 Cmp.getValue(1));
7030}
7031
7032/// isVMOVModifiedImm - Check if the specified splat value corresponds to a
7033/// valid vector constant for a NEON or MVE instruction with a "modified
7034/// immediate" operand (e.g., VMOV). If so, return the encoded value.
7035static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
7036 unsigned SplatBitSize, SelectionDAG &DAG,
7037 const SDLoc &dl, EVT &VT, EVT VectorVT,
7038 VMOVModImmType type) {
7039 unsigned OpCmode, Imm;
7040 bool is128Bits = VectorVT.is128BitVector();
7041
7042 // SplatBitSize is set to the smallest size that splats the vector, so a
7043 // zero vector will always have SplatBitSize == 8. However, NEON modified
7044 // immediate instructions others than VMOV do not support the 8-bit encoding
7045 // of a zero vector, and the default encoding of zero is supposed to be the
7046 // 32-bit version.
7047 if (SplatBits == 0)
7048 SplatBitSize = 32;
7049
7050 switch (SplatBitSize) {
7051 case 8:
7052 if (type != VMOVModImm)
7053 return SDValue();
7054 // Any 1-byte value is OK. Op=0, Cmode=1110.
7055 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
7056 OpCmode = 0xe;
7057 Imm = SplatBits;
7058 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
7059 break;
7060
7061 case 16:
7062 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
7063 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
7064 if ((SplatBits & ~0xff) == 0) {
7065 // Value = 0x00nn: Op=x, Cmode=100x.
7066 OpCmode = 0x8;
7067 Imm = SplatBits;
7068 break;
7069 }
7070 if ((SplatBits & ~0xff00) == 0) {
7071 // Value = 0xnn00: Op=x, Cmode=101x.
7072 OpCmode = 0xa;
7073 Imm = SplatBits >> 8;
7074 break;
7075 }
7076 return SDValue();
7077
7078 case 32:
7079 // NEON's 32-bit VMOV supports splat values where:
7080 // * only one byte is nonzero, or
7081 // * the least significant byte is 0xff and the second byte is nonzero, or
7082 // * the least significant 2 bytes are 0xff and the third is nonzero.
7083 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
7084 if ((SplatBits & ~0xff) == 0) {
7085 // Value = 0x000000nn: Op=x, Cmode=000x.
7086 OpCmode = 0;
7087 Imm = SplatBits;
7088 break;
7089 }
7090 if ((SplatBits & ~0xff00) == 0) {
7091 // Value = 0x0000nn00: Op=x, Cmode=001x.
7092 OpCmode = 0x2;
7093 Imm = SplatBits >> 8;
7094 break;
7095 }
7096 if ((SplatBits & ~0xff0000) == 0) {
7097 // Value = 0x00nn0000: Op=x, Cmode=010x.
7098 OpCmode = 0x4;
7099 Imm = SplatBits >> 16;
7100 break;
7101 }
7102 if ((SplatBits & ~0xff000000) == 0) {
7103 // Value = 0xnn000000: Op=x, Cmode=011x.
7104 OpCmode = 0x6;
7105 Imm = SplatBits >> 24;
7106 break;
7107 }
7108
7109 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
7110 if (type == OtherModImm) return SDValue();
7111
7112 if ((SplatBits & ~0xffff) == 0 &&
7113 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
7114 // Value = 0x0000nnff: Op=x, Cmode=1100.
7115 OpCmode = 0xc;
7116 Imm = SplatBits >> 8;
7117 break;
7118 }
7119
7120 // cmode == 0b1101 is not supported for MVE VMVN
7121 if (type == MVEVMVNModImm)
7122 return SDValue();
7123
7124 if ((SplatBits & ~0xffffff) == 0 &&
7125 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
7126 // Value = 0x00nnffff: Op=x, Cmode=1101.
7127 OpCmode = 0xd;
7128 Imm = SplatBits >> 16;
7129 break;
7130 }
7131
7132 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
7133 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
7134 // VMOV.I32. A (very) minor optimization would be to replicate the value
7135 // and fall through here to test for a valid 64-bit splat. But, then the
7136 // caller would also need to check and handle the change in size.
7137 return SDValue();
7138
7139 case 64: {
7140 if (type != VMOVModImm)
7141 return SDValue();
7142 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
7143 uint64_t BitMask = 0xff;
7144 unsigned ImmMask = 1;
7145 Imm = 0;
7146 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
7147 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
7148 Imm |= ImmMask;
7149 } else if ((SplatBits & BitMask) != 0) {
7150 return SDValue();
7151 }
7152 BitMask <<= 8;
7153 ImmMask <<= 1;
7154 }
7155
7156 // Op=1, Cmode=1110.
7157 OpCmode = 0x1e;
7158 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
7159 break;
7160 }
7161
7162 default:
7163 llvm_unreachable("unexpected size for isVMOVModifiedImm");
7164 }
7165
7166 unsigned EncodedVal = ARM_AM::createVMOVModImm(OpCmode, Imm);
7167 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
7168}
7169
7170SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
7171 const ARMSubtarget *ST) const {
7172 EVT VT = Op.getValueType();
7173 bool IsDouble = (VT == MVT::f64);
7174 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
7175 const APFloat &FPVal = CFP->getValueAPF();
7176
7177 // Prevent floating-point constants from using literal loads
7178 // when execute-only is enabled.
7179 if (ST->genExecuteOnly()) {
7180 // We shouldn't trigger this for v6m execute-only
7181 assert((!ST->isThumb1Only() || ST->hasV8MBaselineOps()) &&
7182 "Unexpected architecture");
7183
7184 // If we can represent the constant as an immediate, don't lower it
7185 if (isFPImmLegal(FPVal, VT))
7186 return Op;
7187 // Otherwise, construct as integer, and move to float register
7188 APInt INTVal = FPVal.bitcastToAPInt();
7189 SDLoc DL(CFP);
7190 switch (VT.getSimpleVT().SimpleTy) {
7191 default:
7192 llvm_unreachable("Unknown floating point type!");
7193 break;
7194 case MVT::f64: {
7195 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
7196 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
7197 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
7198 }
7199 case MVT::f32:
7200 return DAG.getNode(ARMISD::VMOVSR, DL, VT,
7201 DAG.getConstant(INTVal, DL, MVT::i32));
7202 }
7203 }
7204
7205 if (!ST->hasVFP3Base())
7206 return SDValue();
7207
7208 // Use the default (constant pool) lowering for double constants when we have
7209 // an SP-only FPU
7210 if (IsDouble && !Subtarget->hasFP64())
7211 return SDValue();
7212
7213 // Try splatting with a VMOV.f32...
7214 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
7215
7216 if (ImmVal != -1) {
7217 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
7218 // We have code in place to select a valid ConstantFP already, no need to
7219 // do any mangling.
7220 return Op;
7221 }
7222
7223 // It's a float and we are trying to use NEON operations where
7224 // possible. Lower it to a splat followed by an extract.
7225 SDLoc DL(Op);
7226 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
7227 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
7228 NewVal);
7229 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
7230 DAG.getConstant(0, DL, MVT::i32));
7231 }
7232
7233 // The rest of our options are NEON only, make sure that's allowed before
7234 // proceeding..
7235 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
7236 return SDValue();
7237
7238 EVT VMovVT;
7239 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
7240
7241 // It wouldn't really be worth bothering for doubles except for one very
7242 // important value, which does happen to match: 0.0. So make sure we don't do
7243 // anything stupid.
7244 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
7245 return SDValue();
7246
7247 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
7248 SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
7249 VMovVT, VT, VMOVModImm);
7250 if (NewVal != SDValue()) {
7251 SDLoc DL(Op);
7252 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
7253 NewVal);
7254 if (IsDouble)
7255 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7256
7257 // It's a float: cast and extract a vector element.
7258 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7259 VecConstant);
7260 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7261 DAG.getConstant(0, DL, MVT::i32));
7262 }
7263
7264 // Finally, try a VMVN.i32
7265 NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
7266 VT, VMVNModImm);
7267 if (NewVal != SDValue()) {
7268 SDLoc DL(Op);
7269 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
7270
7271 if (IsDouble)
7272 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7273
7274 // It's a float: cast and extract a vector element.
7275 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7276 VecConstant);
7277 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7278 DAG.getConstant(0, DL, MVT::i32));
7279 }
7280
7281 return SDValue();
7282}
7283
7284// check if an VEXT instruction can handle the shuffle mask when the
7285// vector sources of the shuffle are the same.
7286static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
7287 unsigned NumElts = VT.getVectorNumElements();
7288
7289 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7290 if (M[0] < 0)
7291 return false;
7292
7293 Imm = M[0];
7294
7295 // If this is a VEXT shuffle, the immediate value is the index of the first
7296 // element. The other shuffle indices must be the successive elements after
7297 // the first one.
7298 unsigned ExpectedElt = Imm;
7299 for (unsigned i = 1; i < NumElts; ++i) {
7300 // Increment the expected index. If it wraps around, just follow it
7301 // back to index zero and keep going.
7302 ++ExpectedElt;
7303 if (ExpectedElt == NumElts)
7304 ExpectedElt = 0;
7305
7306 if (M[i] < 0) continue; // ignore UNDEF indices
7307 if (ExpectedElt != static_cast<unsigned>(M[i]))
7308 return false;
7309 }
7310
7311 return true;
7312}
7313
7314static bool isVEXTMask(ArrayRef<int> M, EVT VT,
7315 bool &ReverseVEXT, unsigned &Imm) {
7316 unsigned NumElts = VT.getVectorNumElements();
7317 ReverseVEXT = false;
7318
7319 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7320 if (M[0] < 0)
7321 return false;
7322
7323 Imm = M[0];
7324
7325 // If this is a VEXT shuffle, the immediate value is the index of the first
7326 // element. The other shuffle indices must be the successive elements after
7327 // the first one.
7328 unsigned ExpectedElt = Imm;
7329 for (unsigned i = 1; i < NumElts; ++i) {
7330 // Increment the expected index. If it wraps around, it may still be
7331 // a VEXT but the source vectors must be swapped.
7332 ExpectedElt += 1;
7333 if (ExpectedElt == NumElts * 2) {
7334 ExpectedElt = 0;
7335 ReverseVEXT = true;
7336 }
7337
7338 if (M[i] < 0) continue; // ignore UNDEF indices
7339 if (ExpectedElt != static_cast<unsigned>(M[i]))
7340 return false;
7341 }
7342
7343 // Adjust the index value if the source operands will be swapped.
7344 if (ReverseVEXT)
7345 Imm -= NumElts;
7346
7347 return true;
7348}
7349
7350static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
7351 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
7352 // range, then 0 is placed into the resulting vector. So pretty much any mask
7353 // of 8 elements can work here.
7354 return VT == MVT::v8i8 && M.size() == 8;
7355}
7356
7357static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
7358 unsigned Index) {
7359 if (Mask.size() == Elements * 2)
7360 return Index / Elements;
7361 return Mask[Index] == 0 ? 0 : 1;
7362}
7363
7364// Checks whether the shuffle mask represents a vector transpose (VTRN) by
7365// checking that pairs of elements in the shuffle mask represent the same index
7366// in each vector, incrementing the expected index by 2 at each step.
7367// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
7368// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
7369// v2={e,f,g,h}
7370// WhichResult gives the offset for each element in the mask based on which
7371// of the two results it belongs to.
7372//
7373// The transpose can be represented either as:
7374// result1 = shufflevector v1, v2, result1_shuffle_mask
7375// result2 = shufflevector v1, v2, result2_shuffle_mask
7376// where v1/v2 and the shuffle masks have the same number of elements
7377// (here WhichResult (see below) indicates which result is being checked)
7378//
7379// or as:
7380// results = shufflevector v1, v2, shuffle_mask
7381// where both results are returned in one vector and the shuffle mask has twice
7382// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
7383// want to check the low half and high half of the shuffle mask as if it were
7384// the other case
7385static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7386 unsigned EltSz = VT.getScalarSizeInBits();
7387 if (EltSz == 64)
7388 return false;
7389
7390 unsigned NumElts = VT.getVectorNumElements();
7391 if (M.size() != NumElts && M.size() != NumElts*2)
7392 return false;
7393
7394 // If the mask is twice as long as the input vector then we need to check the
7395 // upper and lower parts of the mask with a matching value for WhichResult
7396 // FIXME: A mask with only even values will be rejected in case the first
7397 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
7398 // M[0] is used to determine WhichResult
7399 for (unsigned i = 0; i < M.size(); i += NumElts) {
7400 WhichResult = SelectPairHalf(NumElts, M, i);
7401 for (unsigned j = 0; j < NumElts; j += 2) {
7402 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7403 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
7404 return false;
7405 }
7406 }
7407
7408 if (M.size() == NumElts*2)
7409 WhichResult = 0;
7410
7411 return true;
7412}
7413
7414/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
7415/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7416/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
7417static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7418 unsigned EltSz = VT.getScalarSizeInBits();
7419 if (EltSz == 64)
7420 return false;
7421
7422 unsigned NumElts = VT.getVectorNumElements();
7423 if (M.size() != NumElts && M.size() != NumElts*2)
7424 return false;
7425
7426 for (unsigned i = 0; i < M.size(); i += NumElts) {
7427 WhichResult = SelectPairHalf(NumElts, M, i);
7428 for (unsigned j = 0; j < NumElts; j += 2) {
7429 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7430 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
7431 return false;
7432 }
7433 }
7434
7435 if (M.size() == NumElts*2)
7436 WhichResult = 0;
7437
7438 return true;
7439}
7440
7441// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
7442// that the mask elements are either all even and in steps of size 2 or all odd
7443// and in steps of size 2.
7444// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
7445// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
7446// v2={e,f,g,h}
7447// Requires similar checks to that of isVTRNMask with
7448// respect the how results are returned.
7449static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7450 unsigned EltSz = VT.getScalarSizeInBits();
7451 if (EltSz == 64)
7452 return false;
7453
7454 unsigned NumElts = VT.getVectorNumElements();
7455 if (M.size() != NumElts && M.size() != NumElts*2)
7456 return false;
7457
7458 for (unsigned i = 0; i < M.size(); i += NumElts) {
7459 WhichResult = SelectPairHalf(NumElts, M, i);
7460 for (unsigned j = 0; j < NumElts; ++j) {
7461 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
7462 return false;
7463 }
7464 }
7465
7466 if (M.size() == NumElts*2)
7467 WhichResult = 0;
7468
7469 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7470 if (VT.is64BitVector() && EltSz == 32)
7471 return false;
7472
7473 return true;
7474}
7475
7476/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
7477/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7478/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
7479static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7480 unsigned EltSz = VT.getScalarSizeInBits();
7481 if (EltSz == 64)
7482 return false;
7483
7484 unsigned NumElts = VT.getVectorNumElements();
7485 if (M.size() != NumElts && M.size() != NumElts*2)
7486 return false;
7487
7488 unsigned Half = NumElts / 2;
7489 for (unsigned i = 0; i < M.size(); i += NumElts) {
7490 WhichResult = SelectPairHalf(NumElts, M, i);
7491 for (unsigned j = 0; j < NumElts; j += Half) {
7492 unsigned Idx = WhichResult;
7493 for (unsigned k = 0; k < Half; ++k) {
7494 int MIdx = M[i + j + k];
7495 if (MIdx >= 0 && (unsigned) MIdx != Idx)
7496 return false;
7497 Idx += 2;
7498 }
7499 }
7500 }
7501
7502 if (M.size() == NumElts*2)
7503 WhichResult = 0;
7504
7505 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7506 if (VT.is64BitVector() && EltSz == 32)
7507 return false;
7508
7509 return true;
7510}
7511
7512// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
7513// that pairs of elements of the shufflemask represent the same index in each
7514// vector incrementing sequentially through the vectors.
7515// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
7516// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
7517// v2={e,f,g,h}
7518// Requires similar checks to that of isVTRNMask with respect the how results
7519// are returned.
7520static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7521 unsigned EltSz = VT.getScalarSizeInBits();
7522 if (EltSz == 64)
7523 return false;
7524
7525 unsigned NumElts = VT.getVectorNumElements();
7526 if (M.size() != NumElts && M.size() != NumElts*2)
7527 return false;
7528
7529 for (unsigned i = 0; i < M.size(); i += NumElts) {
7530 WhichResult = SelectPairHalf(NumElts, M, i);
7531 unsigned Idx = WhichResult * NumElts / 2;
7532 for (unsigned j = 0; j < NumElts; j += 2) {
7533 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7534 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
7535 return false;
7536 Idx += 1;
7537 }
7538 }
7539
7540 if (M.size() == NumElts*2)
7541 WhichResult = 0;
7542
7543 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7544 if (VT.is64BitVector() && EltSz == 32)
7545 return false;
7546
7547 return true;
7548}
7549
7550/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
7551/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7552/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
7553static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7554 unsigned EltSz = VT.getScalarSizeInBits();
7555 if (EltSz == 64)
7556 return false;
7557
7558 unsigned NumElts = VT.getVectorNumElements();
7559 if (M.size() != NumElts && M.size() != NumElts*2)
7560 return false;
7561
7562 for (unsigned i = 0; i < M.size(); i += NumElts) {
7563 WhichResult = SelectPairHalf(NumElts, M, i);
7564 unsigned Idx = WhichResult * NumElts / 2;
7565 for (unsigned j = 0; j < NumElts; j += 2) {
7566 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7567 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
7568 return false;
7569 Idx += 1;
7570 }
7571 }
7572
7573 if (M.size() == NumElts*2)
7574 WhichResult = 0;
7575
7576 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7577 if (VT.is64BitVector() && EltSz == 32)
7578 return false;
7579
7580 return true;
7581}
7582
7583/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
7584/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
7585static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
7586 unsigned &WhichResult,
7587 bool &isV_UNDEF) {
7588 isV_UNDEF = false;
7589 if (isVTRNMask(ShuffleMask, VT, WhichResult))
7590 return ARMISD::VTRN;
7591 if (isVUZPMask(ShuffleMask, VT, WhichResult))
7592 return ARMISD::VUZP;
7593 if (isVZIPMask(ShuffleMask, VT, WhichResult))
7594 return ARMISD::VZIP;
7595
7596 isV_UNDEF = true;
7597 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
7598 return ARMISD::VTRN;
7599 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7600 return ARMISD::VUZP;
7601 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7602 return ARMISD::VZIP;
7603
7604 return 0;
7605}
7606
7607/// \return true if this is a reverse operation on an vector.
7608static bool isReverseMask(ArrayRef<int> M, EVT VT) {
7609 unsigned NumElts = VT.getVectorNumElements();
7610 // Make sure the mask has the right size.
7611 if (NumElts != M.size())
7612 return false;
7613
7614 // Look for <15, ..., 3, -1, 1, 0>.
7615 for (unsigned i = 0; i != NumElts; ++i)
7616 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
7617 return false;
7618
7619 return true;
7620}
7621
7622static bool isTruncMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7623 unsigned NumElts = VT.getVectorNumElements();
7624 // Make sure the mask has the right size.
7625 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7626 return false;
7627
7628 // Half-width truncation patterns (e.g. v4i32 -> v8i16):
7629 // !Top && SingleSource: <0, 2, 4, 6, 0, 2, 4, 6>
7630 // !Top && !SingleSource: <0, 2, 4, 6, 8, 10, 12, 14>
7631 // Top && SingleSource: <1, 3, 5, 7, 1, 3, 5, 7>
7632 // Top && !SingleSource: <1, 3, 5, 7, 9, 11, 13, 15>
7633 int Ofs = Top ? 1 : 0;
7634 int Upper = SingleSource ? 0 : NumElts;
7635 for (int i = 0, e = NumElts / 2; i != e; ++i) {
7636 if (M[i] >= 0 && M[i] != (i * 2) + Ofs)
7637 return false;
7638 if (M[i + e] >= 0 && M[i + e] != (i * 2) + Ofs + Upper)
7639 return false;
7640 }
7641 return true;
7642}
7643
7644static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7645 unsigned NumElts = VT.getVectorNumElements();
7646 // Make sure the mask has the right size.
7647 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7648 return false;
7649
7650 // If Top
7651 // Look for <0, N, 2, N+2, 4, N+4, ..>.
7652 // This inserts Input2 into Input1
7653 // else if not Top
7654 // Look for <0, N+1, 2, N+3, 4, N+5, ..>
7655 // This inserts Input1 into Input2
7656 unsigned Offset = Top ? 0 : 1;
7657 unsigned N = SingleSource ? 0 : NumElts;
7658 for (unsigned i = 0; i < NumElts; i += 2) {
7659 if (M[i] >= 0 && M[i] != (int)i)
7660 return false;
7661 if (M[i + 1] >= 0 && M[i + 1] != (int)(N + i + Offset))
7662 return false;
7663 }
7664
7665 return true;
7666}
7667
7668static bool isVMOVNTruncMask(ArrayRef<int> M, EVT ToVT, bool rev) {
7669 unsigned NumElts = ToVT.getVectorNumElements();
7670 if (NumElts != M.size())
7671 return false;
7672
7673 // Test if the Trunc can be convertable to a VMOVN with this shuffle. We are
7674 // looking for patterns of:
7675 // !rev: 0 N/2 1 N/2+1 2 N/2+2 ...
7676 // rev: N/2 0 N/2+1 1 N/2+2 2 ...
7677
7678 unsigned Off0 = rev ? NumElts / 2 : 0;
7679 unsigned Off1 = rev ? 0 : NumElts / 2;
7680 for (unsigned i = 0; i < NumElts; i += 2) {
7681 if (M[i] >= 0 && M[i] != (int)(Off0 + i / 2))
7682 return false;
7683 if (M[i + 1] >= 0 && M[i + 1] != (int)(Off1 + i / 2))
7684 return false;
7685 }
7686
7687 return true;
7688}
7689
7690// Reconstruct an MVE VCVT from a BuildVector of scalar fptrunc, all extracted
7691// from a pair of inputs. For example:
7692// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7693// FP_ROUND(EXTRACT_ELT(Y, 0),
7694// FP_ROUND(EXTRACT_ELT(X, 1),
7695// FP_ROUND(EXTRACT_ELT(Y, 1), ...)
7697 const ARMSubtarget *ST) {
7698 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7699 if (!ST->hasMVEFloatOps())
7700 return SDValue();
7701
7702 SDLoc dl(BV);
7703 EVT VT = BV.getValueType();
7704 if (VT != MVT::v8f16)
7705 return SDValue();
7706
7707 // We are looking for a buildvector of fptrunc elements, where all the
7708 // elements are interleavingly extracted from two sources. Check the first two
7709 // items are valid enough and extract some info from them (they are checked
7710 // properly in the loop below).
7711 if (BV.getOperand(0).getOpcode() != ISD::FP_ROUND ||
7714 return SDValue();
7715 if (BV.getOperand(1).getOpcode() != ISD::FP_ROUND ||
7718 return SDValue();
7719 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7720 SDValue Op1 = BV.getOperand(1).getOperand(0).getOperand(0);
7721 if (Op0.getValueType() != MVT::v4f32 || Op1.getValueType() != MVT::v4f32)
7722 return SDValue();
7723
7724 // Check all the values in the BuildVector line up with our expectations.
7725 for (unsigned i = 1; i < 4; i++) {
7726 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7727 return Trunc.getOpcode() == ISD::FP_ROUND &&
7729 Trunc.getOperand(0).getOperand(0) == Op &&
7730 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7731 };
7732 if (!Check(BV.getOperand(i * 2 + 0), Op0, i))
7733 return SDValue();
7734 if (!Check(BV.getOperand(i * 2 + 1), Op1, i))
7735 return SDValue();
7736 }
7737
7738 SDValue N1 = DAG.getNode(ARMISD::VCVTN, dl, VT, DAG.getUNDEF(VT), Op0,
7739 DAG.getConstant(0, dl, MVT::i32));
7740 return DAG.getNode(ARMISD::VCVTN, dl, VT, N1, Op1,
7741 DAG.getConstant(1, dl, MVT::i32));
7742}
7743
7744// Reconstruct an MVE VCVT from a BuildVector of scalar fpext, all extracted
7745// from a single input on alternating lanes. For example:
7746// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7747// FP_ROUND(EXTRACT_ELT(X, 2),
7748// FP_ROUND(EXTRACT_ELT(X, 4), ...)
7750 const ARMSubtarget *ST) {
7751 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7752 if (!ST->hasMVEFloatOps())
7753 return SDValue();
7754
7755 SDLoc dl(BV);
7756 EVT VT = BV.getValueType();
7757 if (VT != MVT::v4f32)
7758 return SDValue();
7759
7760 // We are looking for a buildvector of fptext elements, where all the
7761 // elements are alternating lanes from a single source. For example <0,2,4,6>
7762 // or <1,3,5,7>. Check the first two items are valid enough and extract some
7763 // info from them (they are checked properly in the loop below).
7764 if (BV.getOperand(0).getOpcode() != ISD::FP_EXTEND ||
7766 return SDValue();
7767 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7769 if (Op0.getValueType() != MVT::v8f16 || (Offset != 0 && Offset != 1))
7770 return SDValue();
7771
7772 // Check all the values in the BuildVector line up with our expectations.
7773 for (unsigned i = 1; i < 4; i++) {
7774 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7775 return Trunc.getOpcode() == ISD::FP_EXTEND &&
7777 Trunc.getOperand(0).getOperand(0) == Op &&
7778 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7779 };
7780 if (!Check(BV.getOperand(i), Op0, 2 * i + Offset))
7781 return SDValue();
7782 }
7783
7784 return DAG.getNode(ARMISD::VCVTL, dl, VT, Op0,
7785 DAG.getConstant(Offset, dl, MVT::i32));
7786}
7787
7788// If N is an integer constant that can be moved into a register in one
7789// instruction, return an SDValue of such a constant (will become a MOV
7790// instruction). Otherwise return null.
7792 const ARMSubtarget *ST, const SDLoc &dl) {
7793 uint64_t Val;
7794 if (!isa<ConstantSDNode>(N))
7795 return SDValue();
7796 Val = N->getAsZExtVal();
7797
7798 if (ST->isThumb1Only()) {
7799 if (Val <= 255 || ~Val <= 255)
7800 return DAG.getConstant(Val, dl, MVT::i32);
7801 } else {
7802 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
7803 return DAG.getConstant(Val, dl, MVT::i32);
7804 }
7805 return SDValue();
7806}
7807
7809 const ARMSubtarget *ST) {
7810 SDLoc dl(Op);
7811 EVT VT = Op.getValueType();
7812
7813 assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!");
7814
7815 unsigned NumElts = VT.getVectorNumElements();
7816 unsigned BoolMask;
7817 unsigned BitsPerBool;
7818 if (NumElts == 2) {
7819 BitsPerBool = 8;
7820 BoolMask = 0xff;
7821 } else if (NumElts == 4) {
7822 BitsPerBool = 4;
7823 BoolMask = 0xf;
7824 } else if (NumElts == 8) {
7825 BitsPerBool = 2;
7826 BoolMask = 0x3;
7827 } else if (NumElts == 16) {
7828 BitsPerBool = 1;
7829 BoolMask = 0x1;
7830 } else
7831 return SDValue();
7832
7833 // If this is a single value copied into all lanes (a splat), we can just sign
7834 // extend that single value
7835 SDValue FirstOp = Op.getOperand(0);
7836 if (!isa<ConstantSDNode>(FirstOp) &&
7837 llvm::all_of(llvm::drop_begin(Op->ops()), [&FirstOp](const SDUse &U) {
7838 return U.get().isUndef() || U.get() == FirstOp;
7839 })) {
7840 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp,
7841 DAG.getValueType(MVT::i1));
7842 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext);
7843 }
7844
7845 // First create base with bits set where known
7846 unsigned Bits32 = 0;
7847 for (unsigned i = 0; i < NumElts; ++i) {
7848 SDValue V = Op.getOperand(i);
7849 if (!isa<ConstantSDNode>(V) && !V.isUndef())
7850 continue;
7851 bool BitSet = V.isUndef() ? false : V->getAsZExtVal();
7852 if (BitSet)
7853 Bits32 |= BoolMask << (i * BitsPerBool);
7854 }
7855
7856 // Add in unknown nodes
7858 DAG.getConstant(Bits32, dl, MVT::i32));
7859 for (unsigned i = 0; i < NumElts; ++i) {
7860 SDValue V = Op.getOperand(i);
7861 if (isa<ConstantSDNode>(V) || V.isUndef())
7862 continue;
7863 Base = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Base, V,
7864 DAG.getConstant(i, dl, MVT::i32));
7865 }
7866
7867 return Base;
7868}
7869
7871 const ARMSubtarget *ST) {
7872 if (!ST->hasMVEIntegerOps())
7873 return SDValue();
7874
7875 // We are looking for a buildvector where each element is Op[0] + i*N
7876 EVT VT = Op.getValueType();
7877 SDValue Op0 = Op.getOperand(0);
7878 unsigned NumElts = VT.getVectorNumElements();
7879
7880 // Get the increment value from operand 1
7881 SDValue Op1 = Op.getOperand(1);
7882 if (Op1.getOpcode() != ISD::ADD || Op1.getOperand(0) != Op0 ||
7883 !isa<ConstantSDNode>(Op1.getOperand(1)))
7884 return SDValue();
7885 unsigned N = Op1.getConstantOperandVal(1);
7886 if (N != 1 && N != 2 && N != 4 && N != 8)
7887 return SDValue();
7888
7889 // Check that each other operand matches
7890 for (unsigned I = 2; I < NumElts; I++) {
7891 SDValue OpI = Op.getOperand(I);
7892 if (OpI.getOpcode() != ISD::ADD || OpI.getOperand(0) != Op0 ||
7893 !isa<ConstantSDNode>(OpI.getOperand(1)) ||
7894 OpI.getConstantOperandVal(1) != I * N)
7895 return SDValue();
7896 }
7897
7898 SDLoc DL(Op);
7899 return DAG.getNode(ARMISD::VIDUP, DL, DAG.getVTList(VT, MVT::i32), Op0,
7900 DAG.getConstant(N, DL, MVT::i32));
7901}
7902
7903// Returns true if the operation N can be treated as qr instruction variant at
7904// operand Op.
7905static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op) {
7906 switch (N->getOpcode()) {
7907 case ISD::ADD:
7908 case ISD::MUL:
7909 case ISD::SADDSAT:
7910 case ISD::UADDSAT:
7911 case ISD::AVGFLOORS:
7912 case ISD::AVGFLOORU:
7913 return true;
7914 case ISD::SUB:
7915 case ISD::SSUBSAT:
7916 case ISD::USUBSAT:
7917 return N->getOperand(1).getNode() == Op;
7919 switch (N->getConstantOperandVal(0)) {
7920 case Intrinsic::arm_mve_add_predicated:
7921 case Intrinsic::arm_mve_mul_predicated:
7922 case Intrinsic::arm_mve_qadd_predicated:
7923 case Intrinsic::arm_mve_vhadd:
7924 case Intrinsic::arm_mve_hadd_predicated:
7925 case Intrinsic::arm_mve_vqdmulh:
7926 case Intrinsic::arm_mve_qdmulh_predicated:
7927 case Intrinsic::arm_mve_vqrdmulh:
7928 case Intrinsic::arm_mve_qrdmulh_predicated:
7929 case Intrinsic::arm_mve_vqdmull:
7930 case Intrinsic::arm_mve_vqdmull_predicated:
7931 return true;
7932 case Intrinsic::arm_mve_sub_predicated:
7933 case Intrinsic::arm_mve_qsub_predicated:
7934 case Intrinsic::arm_mve_vhsub:
7935 case Intrinsic::arm_mve_hsub_predicated:
7936 return N->getOperand(2).getNode() == Op;
7937 default:
7938 return false;
7939 }
7940 default:
7941 return false;
7942 }
7943}
7944
7945// If this is a case we can't handle, return null and let the default
7946// expansion code take care of it.
7947SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
7948 const ARMSubtarget *ST) const {
7949 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
7950 SDLoc dl(Op);
7951 EVT VT = Op.getValueType();
7952
7953 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
7954 return LowerBUILD_VECTOR_i1(Op, DAG, ST);
7955
7956 if (SDValue R = LowerBUILD_VECTORToVIDUP(Op, DAG, ST))
7957 return R;
7958
7959 APInt SplatBits, SplatUndef;
7960 unsigned SplatBitSize;
7961 bool HasAnyUndefs;
7962 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7963 if (SplatUndef.isAllOnes())
7964 return DAG.getUNDEF(VT);
7965
7966 // If all the users of this constant splat are qr instruction variants,
7967 // generate a vdup of the constant.
7968 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize &&
7969 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&
7970 all_of(BVN->users(),
7971 [BVN](const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {
7972 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7973 : SplatBitSize == 16 ? MVT::v8i16
7974 : MVT::v16i8;
7975 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7976 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7977 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7978 }
7979
7980 if ((ST->hasNEON() && SplatBitSize <= 64) ||
7981 (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
7982 // Check if an immediate VMOV works.
7983 EVT VmovVT;
7984 SDValue Val =
7985 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
7986 SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm);
7987
7988 if (Val.getNode()) {
7989 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
7990 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
7991 }
7992
7993 // Try an immediate VMVN.
7994 uint64_t NegatedImm = (~SplatBits).getZExtValue();
7995 Val = isVMOVModifiedImm(
7996 NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
7997 VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
7998 if (Val.getNode()) {
7999 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
8000 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
8001 }
8002
8003 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
8004 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
8005 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
8006 if (ImmVal != -1) {
8007 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
8008 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
8009 }
8010 }
8011
8012 // If we are under MVE, generate a VDUP(constant), bitcast to the original
8013 // type.
8014 if (ST->hasMVEIntegerOps() &&
8015 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32)) {
8016 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
8017 : SplatBitSize == 16 ? MVT::v8i16
8018 : MVT::v16i8;
8019 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
8020 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
8021 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
8022 }
8023 }
8024 }
8025
8026 // Scan through the operands to see if only one value is used.
8027 //
8028 // As an optimisation, even if more than one value is used it may be more
8029 // profitable to splat with one value then change some lanes.
8030 //
8031 // Heuristically we decide to do this if the vector has a "dominant" value,
8032 // defined as splatted to more than half of the lanes.
8033 unsigned NumElts = VT.getVectorNumElements();
8034 bool isOnlyLowElement = true;
8035 bool usesOnlyOneValue = true;
8036 bool hasDominantValue = false;
8037 bool isConstant = true;
8038
8039 // Map of the number of times a particular SDValue appears in the
8040 // element list.
8041 DenseMap<SDValue, unsigned> ValueCounts;
8042 SDValue Value;
8043 for (unsigned i = 0; i < NumElts; ++i) {
8044 SDValue V = Op.getOperand(i);
8045 if (V.isUndef())
8046 continue;
8047 if (i > 0)
8048 isOnlyLowElement = false;
8049 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
8050 isConstant = false;
8051
8052 unsigned &Count = ValueCounts[V];
8053
8054 // Is this value dominant? (takes up more than half of the lanes)
8055 if (++Count > (NumElts / 2)) {
8056 hasDominantValue = true;
8057 Value = V;
8058 }
8059 }
8060 if (ValueCounts.size() != 1)
8061 usesOnlyOneValue = false;
8062 if (!Value.getNode() && !ValueCounts.empty())
8063 Value = ValueCounts.begin()->first;
8064
8065 if (ValueCounts.empty())
8066 return DAG.getUNDEF(VT);
8067
8068 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
8069 // Keep going if we are hitting this case.
8070 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
8071 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
8072
8073 unsigned EltSize = VT.getScalarSizeInBits();
8074
8075 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
8076 // i32 and try again.
8077 if (hasDominantValue && EltSize <= 32) {
8078 if (!isConstant) {
8079 SDValue N;
8080
8081 // If we are VDUPing a value that comes directly from a vector, that will
8082 // cause an unnecessary move to and from a GPR, where instead we could
8083 // just use VDUPLANE. We can only do this if the lane being extracted
8084 // is at a constant index, as the VDUP from lane instructions only have
8085 // constant-index forms.
8086 ConstantSDNode *constIndex;
8087 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8088 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
8089 // We need to create a new undef vector to use for the VDUPLANE if the
8090 // size of the vector from which we get the value is different than the
8091 // size of the vector that we need to create. We will insert the element
8092 // such that the register coalescer will remove unnecessary copies.
8093 if (VT != Value->getOperand(0).getValueType()) {
8094 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
8096 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8097 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
8098 Value, DAG.getConstant(index, dl, MVT::i32)),
8099 DAG.getConstant(index, dl, MVT::i32));
8100 } else
8101 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8102 Value->getOperand(0), Value->getOperand(1));
8103 } else
8104 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
8105
8106 if (!usesOnlyOneValue) {
8107 // The dominant value was splatted as 'N', but we now have to insert
8108 // all differing elements.
8109 for (unsigned I = 0; I < NumElts; ++I) {
8110 if (Op.getOperand(I) == Value)
8111 continue;
8113 Ops.push_back(N);
8114 Ops.push_back(Op.getOperand(I));
8115 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
8116 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
8117 }
8118 }
8119 return N;
8120 }
8124 assert(FVT == MVT::f32 || FVT == MVT::f16);
8125 MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
8126 for (unsigned i = 0; i < NumElts; ++i)
8127 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, IVT,
8128 Op.getOperand(i)));
8129 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), IVT, NumElts);
8130 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
8131 Val = LowerBUILD_VECTOR(Val, DAG, ST);
8132 if (Val.getNode())
8133 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8134 }
8135 if (usesOnlyOneValue) {
8136 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
8137 if (isConstant && Val.getNode())
8138 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
8139 }
8140 }
8141
8142 // If all elements are constants and the case above didn't get hit, fall back
8143 // to the default expansion, which will generate a load from the constant
8144 // pool.
8145 if (isConstant)
8146 return SDValue();
8147
8148 // Reconstruct the BUILDVECTOR to one of the legal shuffles (such as vext and
8149 // vmovn). Empirical tests suggest this is rarely worth it for vectors of
8150 // length <= 2.
8151 if (NumElts >= 4)
8152 if (SDValue shuffle = ReconstructShuffle(Op, DAG))
8153 return shuffle;
8154
8155 // Attempt to turn a buildvector of scalar fptrunc's or fpext's back into
8156 // VCVT's
8157 if (SDValue VCVT = LowerBuildVectorOfFPTrunc(Op, DAG, Subtarget))
8158 return VCVT;
8159 if (SDValue VCVT = LowerBuildVectorOfFPExt(Op, DAG, Subtarget))
8160 return VCVT;
8161
8162 if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
8163 // If we haven't found an efficient lowering, try splitting a 128-bit vector
8164 // into two 64-bit vectors; we might discover a better way to lower it.
8165 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
8166 EVT ExtVT = VT.getVectorElementType();
8167 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
8168 SDValue Lower = DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[0], NumElts / 2));
8169 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
8170 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
8171 SDValue Upper =
8172 DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[NumElts / 2], NumElts / 2));
8173 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
8174 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
8175 if (Lower && Upper)
8176 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
8177 }
8178
8179 // Vectors with 32- or 64-bit elements can be built by directly assigning
8180 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
8181 // will be legalized.
8182 if (EltSize >= 32) {
8183 // Do the expansion with floating-point types, since that is what the VFP
8184 // registers are defined to use, and since i64 is not legal.
8185 EVT EltVT = EVT::getFloatingPointVT(EltSize);
8186 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8188 for (unsigned i = 0; i < NumElts; ++i)
8189 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
8190 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8191 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8192 }
8193
8194 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
8195 // know the default expansion would otherwise fall back on something even
8196 // worse. For a vector with one or two non-undef values, that's
8197 // scalar_to_vector for the elements followed by a shuffle (provided the
8198 // shuffle is valid for the target) and materialization element by element
8199 // on the stack followed by a load for everything else.
8200 if (!isConstant && !usesOnlyOneValue) {
8201 SDValue Vec = DAG.getUNDEF(VT);
8202 for (unsigned i = 0 ; i < NumElts; ++i) {
8203 SDValue V = Op.getOperand(i);
8204 if (V.isUndef())
8205 continue;
8206 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
8207 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
8208 }
8209 return Vec;
8210 }
8211
8212 return SDValue();
8213}
8214
8215// Gather data to see if the operation can be modelled as a
8216// shuffle in combination with VEXTs.
8217SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
8218 SelectionDAG &DAG) const {
8219 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
8220 SDLoc dl(Op);
8221 EVT VT = Op.getValueType();
8222 unsigned NumElts = VT.getVectorNumElements();
8223
8224 struct ShuffleSourceInfo {
8225 SDValue Vec;
8226 unsigned MinElt = std::numeric_limits<unsigned>::max();
8227 unsigned MaxElt = 0;
8228
8229 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
8230 // be compatible with the shuffle we intend to construct. As a result
8231 // ShuffleVec will be some sliding window into the original Vec.
8232 SDValue ShuffleVec;
8233
8234 // Code should guarantee that element i in Vec starts at element "WindowBase
8235 // + i * WindowScale in ShuffleVec".
8236 int WindowBase = 0;
8237 int WindowScale = 1;
8238
8239 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
8240
8241 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
8242 };
8243
8244 // First gather all vectors used as an immediate source for this BUILD_VECTOR
8245 // node.
8247 for (unsigned i = 0; i < NumElts; ++i) {
8248 SDValue V = Op.getOperand(i);
8249 if (V.isUndef())
8250 continue;
8251 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
8252 // A shuffle can only come from building a vector from various
8253 // elements of other vectors.
8254 return SDValue();
8255 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
8256 // Furthermore, shuffles require a constant mask, whereas extractelts
8257 // accept variable indices.
8258 return SDValue();
8259 }
8260
8261 // Add this element source to the list if it's not already there.
8262 SDValue SourceVec = V.getOperand(0);
8263 auto Source = llvm::find(Sources, SourceVec);
8264 if (Source == Sources.end())
8265 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
8266
8267 // Update the minimum and maximum lane number seen.
8268 unsigned EltNo = V.getConstantOperandVal(1);
8269 Source->MinElt = std::min(Source->MinElt, EltNo);
8270 Source->MaxElt = std::max(Source->MaxElt, EltNo);
8271 }
8272
8273 // Currently only do something sane when at most two source vectors
8274 // are involved.
8275 if (Sources.size() > 2)
8276 return SDValue();
8277
8278 // Find out the smallest element size among result and two sources, and use
8279 // it as element size to build the shuffle_vector.
8280 EVT SmallestEltTy = VT.getVectorElementType();
8281 for (auto &Source : Sources) {
8282 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
8283 if (SrcEltTy.bitsLT(SmallestEltTy))
8284 SmallestEltTy = SrcEltTy;
8285 }
8286 unsigned ResMultiplier =
8287 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
8288 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
8289 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
8290
8291 // If the source vector is too wide or too narrow, we may nevertheless be able
8292 // to construct a compatible shuffle either by concatenating it with UNDEF or
8293 // extracting a suitable range of elements.
8294 for (auto &Src : Sources) {
8295 EVT SrcVT = Src.ShuffleVec.getValueType();
8296
8297 uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
8298 uint64_t VTSize = VT.getFixedSizeInBits();
8299 if (SrcVTSize == VTSize)
8300 continue;
8301
8302 // This stage of the search produces a source with the same element type as
8303 // the original, but with a total width matching the BUILD_VECTOR output.
8304 EVT EltVT = SrcVT.getVectorElementType();
8305 unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
8306 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
8307
8308 if (SrcVTSize < VTSize) {
8309 if (2 * SrcVTSize != VTSize)
8310 return SDValue();
8311 // We can pad out the smaller vector for free, so if it's part of a
8312 // shuffle...
8313 Src.ShuffleVec =
8314 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
8315 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
8316 continue;
8317 }
8318
8319 if (SrcVTSize != 2 * VTSize)
8320 return SDValue();
8321
8322 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8323 // Span too large for a VEXT to cope
8324 return SDValue();
8325 }
8326
8327 if (Src.MinElt >= NumSrcElts) {
8328 // The extraction can just take the second half
8329 Src.ShuffleVec =
8330 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8331 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8332 Src.WindowBase = -NumSrcElts;
8333 } else if (Src.MaxElt < NumSrcElts) {
8334 // The extraction can just take the first half
8335 Src.ShuffleVec =
8336 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8337 DAG.getConstant(0, dl, MVT::i32));
8338 } else {
8339 // An actual VEXT is needed
8340 SDValue VEXTSrc1 =
8341 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8342 DAG.getConstant(0, dl, MVT::i32));
8343 SDValue VEXTSrc2 =
8344 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8345 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8346
8347 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
8348 VEXTSrc2,
8349 DAG.getConstant(Src.MinElt, dl, MVT::i32));
8350 Src.WindowBase = -Src.MinElt;
8351 }
8352 }
8353
8354 // Another possible incompatibility occurs from the vector element types. We
8355 // can fix this by bitcasting the source vectors to the same type we intend
8356 // for the shuffle.
8357 for (auto &Src : Sources) {
8358 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8359 if (SrcEltTy == SmallestEltTy)
8360 continue;
8361 assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
8362 Src.ShuffleVec = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, ShuffleVT, Src.ShuffleVec);
8363 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
8364 Src.WindowBase *= Src.WindowScale;
8365 }
8366
8367 // Final check before we try to actually produce a shuffle.
8368 LLVM_DEBUG({
8369 for (auto Src : Sources)
8370 assert(Src.ShuffleVec.getValueType() == ShuffleVT);
8371 });
8372
8373 // The stars all align, our next step is to produce the mask for the shuffle.
8375 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
8376 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
8377 SDValue Entry = Op.getOperand(i);
8378 if (Entry.isUndef())
8379 continue;
8380
8381 auto Src = llvm::find(Sources, Entry.getOperand(0));
8382 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8383
8384 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
8385 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
8386 // segment.
8387 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8388 int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
8389 VT.getScalarSizeInBits());
8390 int LanesDefined = BitsDefined / BitsPerShuffleLane;
8391
8392 // This source is expected to fill ResMultiplier lanes of the final shuffle,
8393 // starting at the appropriate offset.
8394 int *LaneMask = &Mask[i * ResMultiplier];
8395
8396 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8397 ExtractBase += NumElts * (Src - Sources.begin());
8398 for (int j = 0; j < LanesDefined; ++j)
8399 LaneMask[j] = ExtractBase + j;
8400 }
8401
8402
8403 // We can't handle more than two sources. This should have already
8404 // been checked before this point.
8405 assert(Sources.size() <= 2 && "Too many sources!");
8406
8407 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
8408 for (unsigned i = 0; i < Sources.size(); ++i)
8409 ShuffleOps[i] = Sources[i].ShuffleVec;
8410
8411 SDValue Shuffle = buildLegalVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
8412 ShuffleOps[1], Mask, DAG);
8413 if (!Shuffle)
8414 return SDValue();
8415 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Shuffle);
8416}
8417
8419 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8428 OP_VUZPL, // VUZP, left result
8429 OP_VUZPR, // VUZP, right result
8430 OP_VZIPL, // VZIP, left result
8431 OP_VZIPR, // VZIP, right result
8432 OP_VTRNL, // VTRN, left result
8433 OP_VTRNR // VTRN, right result
8435
8436static bool isLegalMVEShuffleOp(unsigned PFEntry) {
8437 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8438 switch (OpNum) {
8439 case OP_COPY:
8440 case OP_VREV:
8441 case OP_VDUP0:
8442 case OP_VDUP1:
8443 case OP_VDUP2:
8444 case OP_VDUP3:
8445 return true;
8446 }
8447 return false;
8448}
8449
8450/// isShuffleMaskLegal - Targets can use this to indicate that they only
8451/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
8452/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
8453/// are assumed to be legal.
8455 if (VT.getVectorNumElements() == 4 &&
8456 (VT.is128BitVector() || VT.is64BitVector())) {
8457 unsigned PFIndexes[4];
8458 for (unsigned i = 0; i != 4; ++i) {
8459 if (M[i] < 0)
8460 PFIndexes[i] = 8;
8461 else
8462 PFIndexes[i] = M[i];
8463 }
8464
8465 // Compute the index in the perfect shuffle table.
8466 unsigned PFTableIndex =
8467 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8468 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8469 unsigned Cost = (PFEntry >> 30);
8470
8471 if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
8472 return true;
8473 }
8474
8475 bool ReverseVEXT, isV_UNDEF;
8476 unsigned Imm, WhichResult;
8477
8478 unsigned EltSize = VT.getScalarSizeInBits();
8479 if (EltSize >= 32 ||
8481 ShuffleVectorInst::isIdentityMask(M, M.size()) ||
8482 isVREVMask(M, VT, 64) ||
8483 isVREVMask(M, VT, 32) ||
8484 isVREVMask(M, VT, 16))
8485 return true;
8486 else if (Subtarget->hasNEON() &&
8487 (isVEXTMask(M, VT, ReverseVEXT, Imm) ||
8488 isVTBLMask(M, VT) ||
8489 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
8490 return true;
8491 else if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8492 isReverseMask(M, VT))
8493 return true;
8494 else if (Subtarget->hasMVEIntegerOps() &&
8495 (isVMOVNMask(M, VT, true, false) ||
8496 isVMOVNMask(M, VT, false, false) || isVMOVNMask(M, VT, true, true)))
8497 return true;
8498 else if (Subtarget->hasMVEIntegerOps() &&
8499 (isTruncMask(M, VT, false, false) ||
8500 isTruncMask(M, VT, false, true) ||
8501 isTruncMask(M, VT, true, false) || isTruncMask(M, VT, true, true)))
8502 return true;
8503 else
8504 return false;
8505}
8506
8507/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8508/// the specified operations to build the shuffle.
8509static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8510 SDValue RHS, SelectionDAG &DAG,
8511 const SDLoc &dl) {
8512 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8513 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8514 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8515
8516 if (OpNum == OP_COPY) {
8517 if (LHSID == (1*9+2)*9+3) return LHS;
8518 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8519 return RHS;
8520 }
8521
8522 SDValue OpLHS, OpRHS;
8523 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8524 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8525 EVT VT = OpLHS.getValueType();
8526
8527 switch (OpNum) {
8528 default: llvm_unreachable("Unknown shuffle opcode!");
8529 case OP_VREV:
8530 // VREV divides the vector in half and swaps within the half.
8531 if (VT.getScalarSizeInBits() == 32)
8532 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
8533 // vrev <4 x i16> -> VREV32
8534 if (VT.getScalarSizeInBits() == 16)
8535 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
8536 // vrev <4 x i8> -> VREV16
8537 assert(VT.getScalarSizeInBits() == 8);
8538 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
8539 case OP_VDUP0:
8540 case OP_VDUP1:
8541 case OP_VDUP2:
8542 case OP_VDUP3:
8543 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8544 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
8545 case OP_VEXT1:
8546 case OP_VEXT2:
8547 case OP_VEXT3:
8548 return DAG.getNode(ARMISD::VEXT, dl, VT,
8549 OpLHS, OpRHS,
8550 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
8551 case OP_VUZPL:
8552 case OP_VUZPR:
8553 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
8554 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
8555 case OP_VZIPL:
8556 case OP_VZIPR:
8557 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
8558 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
8559 case OP_VTRNL:
8560 case OP_VTRNR:
8561 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
8562 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
8563 }
8564}
8565
8567 ArrayRef<int> ShuffleMask,
8568 SelectionDAG &DAG) {
8569 // Check to see if we can use the VTBL instruction.
8570 SDValue V1 = Op.getOperand(0);
8571 SDValue V2 = Op.getOperand(1);
8572 SDLoc DL(Op);
8573
8574 SmallVector<SDValue, 8> VTBLMask;
8575 for (int I : ShuffleMask)
8576 VTBLMask.push_back(DAG.getSignedConstant(I, DL, MVT::i32));
8577
8578 if (V2.getNode()->isUndef())
8579 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
8580 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8581
8582 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
8583 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8584}
8585
8587 SDLoc DL(Op);
8588 EVT VT = Op.getValueType();
8589
8590 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8591 "Expect an v8i16/v16i8 type");
8592 SDValue OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, Op.getOperand(0));
8593 // For a v16i8 type: After the VREV, we have got <7, ..., 0, 15, ..., 8>. Now,
8594 // extract the first 8 bytes into the top double word and the last 8 bytes
8595 // into the bottom double word, through a new vector shuffle that will be
8596 // turned into a VEXT on Neon, or a couple of VMOVDs on MVE.
8597 std::vector<int> NewMask;
8598 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8599 NewMask.push_back(VT.getVectorNumElements() / 2 + i);
8600 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8601 NewMask.push_back(i);
8602 return DAG.getVectorShuffle(VT, DL, OpLHS, OpLHS, NewMask);
8603}
8604
8606 switch (VT.getSimpleVT().SimpleTy) {
8607 case MVT::v2i1:
8608 return MVT::v2f64;
8609 case MVT::v4i1:
8610 return MVT::v4i32;
8611 case MVT::v8i1:
8612 return MVT::v8i16;
8613 case MVT::v16i1:
8614 return MVT::v16i8;
8615 default:
8616 llvm_unreachable("Unexpected vector predicate type");
8617 }
8618}
8619
8621 SelectionDAG &DAG) {
8622 // Converting from boolean predicates to integers involves creating a vector
8623 // of all ones or all zeroes and selecting the lanes based upon the real
8624 // predicate.
8626 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), dl, MVT::i32);
8627 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllOnes);
8628
8629 SDValue AllZeroes =
8630 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0x0), dl, MVT::i32);
8631 AllZeroes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes);
8632
8633 // Get full vector type from predicate type
8635
8636 SDValue RecastV1;
8637 // If the real predicate is an v8i1 or v4i1 (not v16i1) then we need to recast
8638 // this to a v16i1. This cannot be done with an ordinary bitcast because the
8639 // sizes are not the same. We have to use a MVE specific PREDICATE_CAST node,
8640 // since we know in hardware the sizes are really the same.
8641 if (VT != MVT::v16i1)
8642 RecastV1 = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred);
8643 else
8644 RecastV1 = Pred;
8645
8646 // Select either all ones or zeroes depending upon the real predicate bits.
8647 SDValue PredAsVector =
8648 DAG.getNode(ISD::VSELECT, dl, MVT::v16i8, RecastV1, AllOnes, AllZeroes);
8649
8650 // Recast our new predicate-as-integer v16i8 vector into something
8651 // appropriate for the shuffle, i.e. v4i32 for a real v4i1 predicate.
8652 return DAG.getNode(ISD::BITCAST, dl, NewVT, PredAsVector);
8653}
8654
8656 const ARMSubtarget *ST) {
8657 EVT VT = Op.getValueType();
8658 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8659 ArrayRef<int> ShuffleMask = SVN->getMask();
8660
8661 assert(ST->hasMVEIntegerOps() &&
8662 "No support for vector shuffle of boolean predicates");
8663
8664 SDValue V1 = Op.getOperand(0);
8665 SDValue V2 = Op.getOperand(1);
8666 SDLoc dl(Op);
8667 if (isReverseMask(ShuffleMask, VT)) {
8668 SDValue cast = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, V1);
8669 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, cast);
8670 SDValue srl = DAG.getNode(ISD::SRL, dl, MVT::i32, rbit,
8671 DAG.getConstant(16, dl, MVT::i32));
8672 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, srl);
8673 }
8674
8675 // Until we can come up with optimised cases for every single vector
8676 // shuffle in existence we have chosen the least painful strategy. This is
8677 // to essentially promote the boolean predicate to a 8-bit integer, where
8678 // each predicate represents a byte. Then we fall back on a normal integer
8679 // vector shuffle and convert the result back into a predicate vector. In
8680 // many cases the generated code might be even better than scalar code
8681 // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit
8682 // fields in a register into 8 other arbitrary 2-bit fields!
8683 SDValue PredAsVector1 = PromoteMVEPredVector(dl, V1, VT, DAG);
8684 EVT NewVT = PredAsVector1.getValueType();
8685 SDValue PredAsVector2 = V2.isUndef() ? DAG.getUNDEF(NewVT)
8686 : PromoteMVEPredVector(dl, V2, VT, DAG);
8687 assert(PredAsVector2.getValueType() == NewVT &&
8688 "Expected identical vector type in expanded i1 shuffle!");
8689
8690 // Do the shuffle!
8691 SDValue Shuffled = DAG.getVectorShuffle(NewVT, dl, PredAsVector1,
8692 PredAsVector2, ShuffleMask);
8693
8694 // Now return the result of comparing the shuffled vector with zero,
8695 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1
8696 // we convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
8697 if (VT == MVT::v2i1) {
8698 SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Shuffled);
8699 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
8700 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8701 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
8702 }
8703 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
8704 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8705}
8706
8708 ArrayRef<int> ShuffleMask,
8709 SelectionDAG &DAG) {
8710 // Attempt to lower the vector shuffle using as many whole register movs as
8711 // possible. This is useful for types smaller than 32bits, which would
8712 // often otherwise become a series for grp movs.
8713 SDLoc dl(Op);
8714 EVT VT = Op.getValueType();
8715 if (VT.getScalarSizeInBits() >= 32)
8716 return SDValue();
8717
8718 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8719 "Unexpected vector type");
8720 int NumElts = VT.getVectorNumElements();
8721 int QuarterSize = NumElts / 4;
8722 // The four final parts of the vector, as i32's
8723 SDValue Parts[4];
8724
8725 // Look for full lane vmovs like <0,1,2,3> or <u,5,6,7> etc, (but not
8726 // <u,u,u,u>), returning the vmov lane index
8727 auto getMovIdx = [](ArrayRef<int> ShuffleMask, int Start, int Length) {
8728 // Detect which mov lane this would be from the first non-undef element.
8729 int MovIdx = -1;
8730 for (int i = 0; i < Length; i++) {
8731 if (ShuffleMask[Start + i] >= 0) {
8732 if (ShuffleMask[Start + i] % Length != i)
8733 return -1;
8734 MovIdx = ShuffleMask[Start + i] / Length;
8735 break;
8736 }
8737 }
8738 // If all items are undef, leave this for other combines
8739 if (MovIdx == -1)
8740 return -1;
8741 // Check the remaining values are the correct part of the same mov
8742 for (int i = 1; i < Length; i++) {
8743 if (ShuffleMask[Start + i] >= 0 &&
8744 (ShuffleMask[Start + i] / Length != MovIdx ||
8745 ShuffleMask[Start + i] % Length != i))
8746 return -1;
8747 }
8748 return MovIdx;
8749 };
8750
8751 for (int Part = 0; Part < 4; ++Part) {
8752 // Does this part look like a mov
8753 int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);
8754 if (Elt != -1) {
8755 SDValue Input = Op->getOperand(0);
8756 if (Elt >= 4) {
8757 Input = Op->getOperand(1);
8758 Elt -= 4;
8759 }
8760 SDValue BitCast = DAG.getBitcast(MVT::v4f32, Input);
8761 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, BitCast,
8762 DAG.getConstant(Elt, dl, MVT::i32));
8763 }
8764 }
8765
8766 // Nothing interesting found, just return
8767 if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])
8768 return SDValue();
8769
8770 // The other parts need to be built with the old shuffle vector, cast to a
8771 // v4i32 and extract_vector_elts
8772 if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {
8773 SmallVector<int, 16> NewShuffleMask;
8774 for (int Part = 0; Part < 4; ++Part)
8775 for (int i = 0; i < QuarterSize; i++)
8776 NewShuffleMask.push_back(
8777 Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
8778 SDValue NewShuffle = DAG.getVectorShuffle(
8779 VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
8780 SDValue BitCast = DAG.getBitcast(MVT::v4f32, NewShuffle);
8781
8782 for (int Part = 0; Part < 4; ++Part)
8783 if (!Parts[Part])
8784 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32,
8785 BitCast, DAG.getConstant(Part, dl, MVT::i32));
8786 }
8787 // Build a vector out of the various parts and bitcast it back to the original
8788 // type.
8789 SDValue NewVec = DAG.getNode(ARMISD::BUILD_VECTOR, dl, MVT::v4f32, Parts);
8790 return DAG.getBitcast(VT, NewVec);
8791}
8792
8794 ArrayRef<int> ShuffleMask,
8795 SelectionDAG &DAG) {
8796 SDValue V1 = Op.getOperand(0);
8797 SDValue V2 = Op.getOperand(1);
8798 EVT VT = Op.getValueType();
8799 unsigned NumElts = VT.getVectorNumElements();
8800
8801 // An One-Off Identity mask is one that is mostly an identity mask from as
8802 // single source but contains a single element out-of-place, either from a
8803 // different vector or from another position in the same vector. As opposed to
8804 // lowering this via a ARMISD::BUILD_VECTOR we can generate an extract/insert
8805 // pair directly.
8806 auto isOneOffIdentityMask = [](ArrayRef<int> Mask, EVT VT, int BaseOffset,
8807 int &OffElement) {
8808 OffElement = -1;
8809 int NonUndef = 0;
8810 for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
8811 if (Mask[i] == -1)
8812 continue;
8813 NonUndef++;
8814 if (Mask[i] != i + BaseOffset) {
8815 if (OffElement == -1)
8816 OffElement = i;
8817 else
8818 return false;
8819 }
8820 }
8821 return NonUndef > 2 && OffElement != -1;
8822 };
8823 int OffElement;
8824 SDValue VInput;
8825 if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))
8826 VInput = V1;
8827 else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))
8828 VInput = V2;
8829 else
8830 return SDValue();
8831
8832 SDLoc dl(Op);
8833 EVT SVT = VT.getScalarType() == MVT::i8 || VT.getScalarType() == MVT::i16
8834 ? MVT::i32
8835 : VT.getScalarType();
8836 SDValue Elt = DAG.getNode(
8837 ISD::EXTRACT_VECTOR_ELT, dl, SVT,
8838 ShuffleMask[OffElement] < (int)NumElts ? V1 : V2,
8839 DAG.getVectorIdxConstant(ShuffleMask[OffElement] % NumElts, dl));
8840 return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, VInput, Elt,
8841 DAG.getVectorIdxConstant(OffElement % NumElts, dl));
8842}
8843
8845 const ARMSubtarget *ST) {
8846 SDValue V1 = Op.getOperand(0);
8847 SDValue V2 = Op.getOperand(1);
8848 SDLoc dl(Op);
8849 EVT VT = Op.getValueType();
8850 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8851 unsigned EltSize = VT.getScalarSizeInBits();
8852
8853 if (ST->hasMVEIntegerOps() && EltSize == 1)
8854 return LowerVECTOR_SHUFFLE_i1(Op, DAG, ST);
8855
8856 // Convert shuffles that are directly supported on NEON to target-specific
8857 // DAG nodes, instead of keeping them as shuffles and matching them again
8858 // during code selection. This is more efficient and avoids the possibility
8859 // of inconsistencies between legalization and selection.
8860 // FIXME: floating-point vectors should be canonicalized to integer vectors
8861 // of the same time so that they get CSEd properly.
8862 ArrayRef<int> ShuffleMask = SVN->getMask();
8863
8864 if (EltSize <= 32) {
8865 if (SVN->isSplat()) {
8866 int Lane = SVN->getSplatIndex();
8867 // If this is undef splat, generate it via "just" vdup, if possible.
8868 if (Lane == -1) Lane = 0;
8869
8870 // Test if V1 is a SCALAR_TO_VECTOR.
8871 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
8872 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8873 }
8874 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
8875 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
8876 // reaches it).
8877 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
8878 !isa<ConstantSDNode>(V1.getOperand(0))) {
8879 bool IsScalarToVector = true;
8880 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
8881 if (!V1.getOperand(i).isUndef()) {
8882 IsScalarToVector = false;
8883 break;
8884 }
8885 if (IsScalarToVector)
8886 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8887 }
8888 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
8889 DAG.getConstant(Lane, dl, MVT::i32));
8890 }
8891
8892 bool ReverseVEXT = false;
8893 unsigned Imm = 0;
8894 if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
8895 if (ReverseVEXT)
8896 std::swap(V1, V2);
8897 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
8898 DAG.getConstant(Imm, dl, MVT::i32));
8899 }
8900
8901 if (isVREVMask(ShuffleMask, VT, 64))
8902 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
8903 if (isVREVMask(ShuffleMask, VT, 32))
8904 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
8905 if (isVREVMask(ShuffleMask, VT, 16))
8906 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
8907
8908 if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
8909 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
8910 DAG.getConstant(Imm, dl, MVT::i32));
8911 }
8912
8913 // Check for Neon shuffles that modify both input vectors in place.
8914 // If both results are used, i.e., if there are two shuffles with the same
8915 // source operands and with masks corresponding to both results of one of
8916 // these operations, DAG memoization will ensure that a single node is
8917 // used for both shuffles.
8918 unsigned WhichResult = 0;
8919 bool isV_UNDEF = false;
8920 if (ST->hasNEON()) {
8921 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8922 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
8923 if (isV_UNDEF)
8924 V2 = V1;
8925 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
8926 .getValue(WhichResult);
8927 }
8928 }
8929 if (ST->hasMVEIntegerOps()) {
8930 if (isVMOVNMask(ShuffleMask, VT, false, false))
8931 return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1,
8932 DAG.getConstant(0, dl, MVT::i32));
8933 if (isVMOVNMask(ShuffleMask, VT, true, false))
8934 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2,
8935 DAG.getConstant(1, dl, MVT::i32));
8936 if (isVMOVNMask(ShuffleMask, VT, true, true))
8937 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V1,
8938 DAG.getConstant(1, dl, MVT::i32));
8939 }
8940
8941 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
8942 // shuffles that produce a result larger than their operands with:
8943 // shuffle(concat(v1, undef), concat(v2, undef))
8944 // ->
8945 // shuffle(concat(v1, v2), undef)
8946 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
8947 //
8948 // This is useful in the general case, but there are special cases where
8949 // native shuffles produce larger results: the two-result ops.
8950 //
8951 // Look through the concat when lowering them:
8952 // shuffle(concat(v1, v2), undef)
8953 // ->
8954 // concat(VZIP(v1, v2):0, :1)
8955 //
8956 if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
8957 SDValue SubV1 = V1->getOperand(0);
8958 SDValue SubV2 = V1->getOperand(1);
8959 EVT SubVT = SubV1.getValueType();
8960
8961 // We expect these to have been canonicalized to -1.
8962 assert(llvm::all_of(ShuffleMask, [&](int i) {
8963 return i < (int)VT.getVectorNumElements();
8964 }) && "Unexpected shuffle index into UNDEF operand!");
8965
8966 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8967 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
8968 if (isV_UNDEF)
8969 SubV2 = SubV1;
8970 assert((WhichResult == 0) &&
8971 "In-place shuffle of concat can only have one result!");
8972 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
8973 SubV1, SubV2);
8974 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
8975 Res.getValue(1));
8976 }
8977 }
8978 }
8979
8980 if (ST->hasMVEIntegerOps() && EltSize <= 32) {
8981 if (SDValue V = LowerVECTOR_SHUFFLEUsingOneOff(Op, ShuffleMask, DAG))
8982 return V;
8983
8984 for (bool Top : {false, true}) {
8985 for (bool SingleSource : {false, true}) {
8986 if (isTruncMask(ShuffleMask, VT, Top, SingleSource)) {
8987 MVT FromSVT = MVT::getIntegerVT(EltSize * 2);
8988 MVT FromVT = MVT::getVectorVT(FromSVT, ShuffleMask.size() / 2);
8989 SDValue Lo = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT, V1);
8990 SDValue Hi = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT,
8991 SingleSource ? V1 : V2);
8992 if (Top) {
8993 SDValue Amt = DAG.getConstant(EltSize, dl, FromVT);
8994 Lo = DAG.getNode(ISD::SRL, dl, FromVT, Lo, Amt);
8995 Hi = DAG.getNode(ISD::SRL, dl, FromVT, Hi, Amt);
8996 }
8997 return DAG.getNode(ARMISD::MVETRUNC, dl, VT, Lo, Hi);
8998 }
8999 }
9000 }
9001 }
9002
9003 // If the shuffle is not directly supported and it has 4 elements, use
9004 // the PerfectShuffle-generated table to synthesize it from other shuffles.
9005 unsigned NumElts = VT.getVectorNumElements();
9006 if (NumElts == 4) {
9007 unsigned PFIndexes[4];
9008 for (unsigned i = 0; i != 4; ++i) {
9009 if (ShuffleMask[i] < 0)
9010 PFIndexes[i] = 8;
9011 else
9012 PFIndexes[i] = ShuffleMask[i];
9013 }
9014
9015 // Compute the index in the perfect shuffle table.
9016 unsigned PFTableIndex =
9017 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
9018 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
9019 unsigned Cost = (PFEntry >> 30);
9020
9021 if (Cost <= 4) {
9022 if (ST->hasNEON())
9023 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9024 else if (isLegalMVEShuffleOp(PFEntry)) {
9025 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9026 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9027 unsigned PFEntryLHS = PerfectShuffleTable[LHSID];
9028 unsigned PFEntryRHS = PerfectShuffleTable[RHSID];
9029 if (isLegalMVEShuffleOp(PFEntryLHS) && isLegalMVEShuffleOp(PFEntryRHS))
9030 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9031 }
9032 }
9033 }
9034
9035 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
9036 if (EltSize >= 32) {
9037 // Do the expansion with floating-point types, since that is what the VFP
9038 // registers are defined to use, and since i64 is not legal.
9039 EVT EltVT = EVT::getFloatingPointVT(EltSize);
9040 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
9041 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
9042 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
9044 for (unsigned i = 0; i < NumElts; ++i) {
9045 if (ShuffleMask[i] < 0)
9046 Ops.push_back(DAG.getUNDEF(EltVT));
9047 else
9048 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
9049 ShuffleMask[i] < (int)NumElts ? V1 : V2,
9050 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
9051 dl, MVT::i32)));
9052 }
9053 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
9054 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
9055 }
9056
9057 if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
9058 isReverseMask(ShuffleMask, VT))
9059 return LowerReverse_VECTOR_SHUFFLE(Op, DAG);
9060
9061 if (ST->hasNEON() && VT == MVT::v8i8)
9062 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
9063 return NewOp;
9064
9065 if (ST->hasMVEIntegerOps())
9066 if (SDValue NewOp = LowerVECTOR_SHUFFLEUsingMovs(Op, ShuffleMask, DAG))
9067 return NewOp;
9068
9069 return SDValue();
9070}
9071
9073 const ARMSubtarget *ST) {
9074 EVT VecVT = Op.getOperand(0).getValueType();
9075 SDLoc dl(Op);
9076
9077 assert(ST->hasMVEIntegerOps() &&
9078 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
9079
9080 SDValue Conv =
9081 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9082 unsigned Lane = Op.getConstantOperandVal(2);
9083 unsigned LaneWidth =
9085 unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
9086 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32,
9087 Op.getOperand(1), DAG.getValueType(MVT::i1));
9088 SDValue BFI = DAG.getNode(ARMISD::BFI, dl, MVT::i32, Conv, Ext,
9089 DAG.getConstant(~Mask, dl, MVT::i32));
9090 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), BFI);
9091}
9092
9093SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
9094 SelectionDAG &DAG) const {
9095 // INSERT_VECTOR_ELT is legal only for immediate indexes.
9096 SDValue Lane = Op.getOperand(2);
9097 if (!isa<ConstantSDNode>(Lane))
9098 return SDValue();
9099
9100 SDValue Elt = Op.getOperand(1);
9101 EVT EltVT = Elt.getValueType();
9102
9103 if (Subtarget->hasMVEIntegerOps() &&
9104 Op.getValueType().getScalarSizeInBits() == 1)
9105 return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);
9106
9107 if (getTypeAction(*DAG.getContext(), EltVT) ==
9109 // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
9110 // but the type system will try to do that if we don't intervene.
9111 // Reinterpret any such vector-element insertion as one with the
9112 // corresponding integer types.
9113
9114 SDLoc dl(Op);
9115
9116 EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
9117 assert(getTypeAction(*DAG.getContext(), IEltVT) !=
9119
9120 SDValue VecIn = Op.getOperand(0);
9121 EVT VecVT = VecIn.getValueType();
9122 EVT IVecVT = EVT::getVectorVT(*DAG.getContext(), IEltVT,
9123 VecVT.getVectorNumElements());
9124
9125 SDValue IElt = DAG.getNode(ISD::BITCAST, dl, IEltVT, Elt);
9126 SDValue IVecIn = DAG.getNode(ISD::BITCAST, dl, IVecVT, VecIn);
9127 SDValue IVecOut = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVecVT,
9128 IVecIn, IElt, Lane);
9129 return DAG.getNode(ISD::BITCAST, dl, VecVT, IVecOut);
9130 }
9131
9132 return Op;
9133}
9134
9136 const ARMSubtarget *ST) {
9137 EVT VecVT = Op.getOperand(0).getValueType();
9138 SDLoc dl(Op);
9139
9140 assert(ST->hasMVEIntegerOps() &&
9141 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
9142
9143 SDValue Conv =
9144 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9145 unsigned Lane = Op.getConstantOperandVal(1);
9146 unsigned LaneWidth =
9148 SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv,
9149 DAG.getConstant(Lane * LaneWidth, dl, MVT::i32));
9150 return Shift;
9151}
9152
9154 const ARMSubtarget *ST) {
9155 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
9156 SDValue Lane = Op.getOperand(1);
9157 if (!isa<ConstantSDNode>(Lane))
9158 return SDValue();
9159
9160 SDValue Vec = Op.getOperand(0);
9161 EVT VT = Vec.getValueType();
9162
9163 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9164 return LowerEXTRACT_VECTOR_ELT_i1(Op, DAG, ST);
9165
9166 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
9167 SDLoc dl(Op);
9168 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
9169 }
9170
9171 return Op;
9172}
9173
9175 const ARMSubtarget *ST) {
9176 SDLoc dl(Op);
9177 assert(Op.getValueType().getScalarSizeInBits() == 1 &&
9178 "Unexpected custom CONCAT_VECTORS lowering");
9180 "Unexpected custom CONCAT_VECTORS lowering");
9181 assert(ST->hasMVEIntegerOps() &&
9182 "CONCAT_VECTORS lowering only supported for MVE");
9183
9184 auto ConcatPair = [&](SDValue V1, SDValue V2) {
9185 EVT Op1VT = V1.getValueType();
9186 EVT Op2VT = V2.getValueType();
9187 assert(Op1VT == Op2VT && "Operand types don't match!");
9188 assert((Op1VT == MVT::v2i1 || Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) &&
9189 "Unexpected i1 concat operations!");
9190 EVT VT = Op1VT.getDoubleNumVectorElementsVT(*DAG.getContext());
9191
9192 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9193 SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
9194
9195 // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
9196 // promoted to v8i16, etc.
9197 MVT ElType =
9199 unsigned NumElts = 2 * Op1VT.getVectorNumElements();
9200
9201 EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
9202 if (Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) {
9203 // Use MVETRUNC to truncate the combined NewV1::NewV2 into the smaller
9204 // ConcatVT.
9205 SDValue ConVec =
9206 DAG.getNode(ARMISD::MVETRUNC, dl, ConcatVT, NewV1, NewV2);
9207 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9208 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9209 }
9210
9211 // Extract the vector elements from Op1 and Op2 one by one and truncate them
9212 // to be the right size for the destination. For example, if Op1 is v4i1
9213 // then the promoted vector is v4i32. The result of concatenation gives a
9214 // v8i1, which when promoted is v8i16. That means each i32 element from Op1
9215 // needs truncating to i16 and inserting in the result.
9216 auto ExtractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
9217 EVT NewVT = NewV.getValueType();
9218 EVT ConcatVT = ConVec.getValueType();
9219 unsigned ExtScale = 1;
9220 if (NewVT == MVT::v2f64) {
9221 NewV = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, NewV);
9222 ExtScale = 2;
9223 }
9224 for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
9225 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
9226 DAG.getIntPtrConstant(i * ExtScale, dl));
9227 ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
9228 DAG.getConstant(j, dl, MVT::i32));
9229 }
9230 return ConVec;
9231 };
9232 unsigned j = 0;
9233 SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
9234 ConVec = ExtractInto(NewV1, ConVec, j);
9235 ConVec = ExtractInto(NewV2, ConVec, j);
9236
9237 // Now return the result of comparing the subvector with zero, which will
9238 // generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9239 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9240 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9241 };
9242
9243 // Concat each pair of subvectors and pack into the lower half of the array.
9244 SmallVector<SDValue> ConcatOps(Op->ops());
9245 while (ConcatOps.size() > 1) {
9246 for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
9247 SDValue V1 = ConcatOps[I];
9248 SDValue V2 = ConcatOps[I + 1];
9249 ConcatOps[I / 2] = ConcatPair(V1, V2);
9250 }
9251 ConcatOps.resize(ConcatOps.size() / 2);
9252 }
9253 return ConcatOps[0];
9254}
9255
9257 const ARMSubtarget *ST) {
9258 EVT VT = Op->getValueType(0);
9259 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9260 return LowerCONCAT_VECTORS_i1(Op, DAG, ST);
9261
9262 // The only time a CONCAT_VECTORS operation can have legal types is when
9263 // two 64-bit vectors are concatenated to a 128-bit vector.
9264 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
9265 "unexpected CONCAT_VECTORS");
9266 SDLoc dl(Op);
9267 SDValue Val = DAG.getUNDEF(MVT::v2f64);
9268 SDValue Op0 = Op.getOperand(0);
9269 SDValue Op1 = Op.getOperand(1);
9270 if (!Op0.isUndef())
9271 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9272 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
9273 DAG.getIntPtrConstant(0, dl));
9274 if (!Op1.isUndef())
9275 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9276 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
9277 DAG.getIntPtrConstant(1, dl));
9278 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
9279}
9280
9282 const ARMSubtarget *ST) {
9283 SDValue V1 = Op.getOperand(0);
9284 SDValue V2 = Op.getOperand(1);
9285 SDLoc dl(Op);
9286 EVT VT = Op.getValueType();
9287 EVT Op1VT = V1.getValueType();
9288 unsigned NumElts = VT.getVectorNumElements();
9289 unsigned Index = V2->getAsZExtVal();
9290
9291 assert(VT.getScalarSizeInBits() == 1 &&
9292 "Unexpected custom EXTRACT_SUBVECTOR lowering");
9293 assert(ST->hasMVEIntegerOps() &&
9294 "EXTRACT_SUBVECTOR lowering only supported for MVE");
9295
9296 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9297
9298 // We now have Op1 promoted to a vector of integers, where v8i1 gets
9299 // promoted to v8i16, etc.
9300
9302
9303 if (NumElts == 2) {
9304 EVT SubVT = MVT::v4i32;
9305 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9306 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j += 2) {
9307 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9308 DAG.getIntPtrConstant(i, dl));
9309 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9310 DAG.getConstant(j, dl, MVT::i32));
9311 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9312 DAG.getConstant(j + 1, dl, MVT::i32));
9313 }
9314 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, SubVec,
9315 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9316 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
9317 }
9318
9319 EVT SubVT = MVT::getVectorVT(ElType, NumElts);
9320 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9321 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
9322 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9323 DAG.getIntPtrConstant(i, dl));
9324 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9325 DAG.getConstant(j, dl, MVT::i32));
9326 }
9327
9328 // Now return the result of comparing the subvector with zero,
9329 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9330 return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec,
9331 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9332}
9333
9334// Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0).
9336 const ARMSubtarget *ST) {
9337 assert(ST->hasMVEIntegerOps() && "Expected MVE!");
9338 EVT VT = N->getValueType(0);
9339 assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&
9340 "Expected a vector i1 type!");
9341 SDValue Op = N->getOperand(0);
9342 EVT FromVT = Op.getValueType();
9343 SDLoc DL(N);
9344
9345 SDValue And =
9346 DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT));
9347 return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT),
9348 DAG.getCondCode(ISD::SETNE));
9349}
9350
9352 const ARMSubtarget *Subtarget) {
9353 if (!Subtarget->hasMVEIntegerOps())
9354 return SDValue();
9355
9356 EVT ToVT = N->getValueType(0);
9357 if (ToVT.getScalarType() == MVT::i1)
9358 return LowerTruncatei1(N, DAG, Subtarget);
9359
9360 // MVE does not have a single instruction to perform the truncation of a v4i32
9361 // into the lower half of a v8i16, in the same way that a NEON vmovn would.
9362 // Most of the instructions in MVE follow the 'Beats' system, where moving
9363 // values from different lanes is usually something that the instructions
9364 // avoid.
9365 //
9366 // Instead it has top/bottom instructions such as VMOVLT/B and VMOVNT/B,
9367 // which take a the top/bottom half of a larger lane and extend it (or do the
9368 // opposite, truncating into the top/bottom lane from a larger lane). Note
9369 // that because of the way we widen lanes, a v4i16 is really a v4i32 using the
9370 // bottom 16bits from each vector lane. This works really well with T/B
9371 // instructions, but that doesn't extend to v8i32->v8i16 where the lanes need
9372 // to move order.
9373 //
9374 // But truncates and sext/zext are always going to be fairly common from llvm.
9375 // We have several options for how to deal with them:
9376 // - Wherever possible combine them into an instruction that makes them
9377 // "free". This includes loads/stores, which can perform the trunc as part
9378 // of the memory operation. Or certain shuffles that can be turned into
9379 // VMOVN/VMOVL.
9380 // - Lane Interleaving to transform blocks surrounded by ext/trunc. So
9381 // trunc(mul(sext(a), sext(b))) may become
9382 // VMOVNT(VMUL(VMOVLB(a), VMOVLB(b)), VMUL(VMOVLT(a), VMOVLT(b))). (Which in
9383 // this case can use VMULL). This is performed in the
9384 // MVELaneInterleavingPass.
9385 // - Otherwise we have an option. By default we would expand the
9386 // zext/sext/trunc into a series of lane extract/inserts going via GPR
9387 // registers. One for each vector lane in the vector. This can obviously be
9388 // very expensive.
9389 // - The other option is to use the fact that loads/store can extend/truncate
9390 // to turn a trunc into two truncating stack stores and a stack reload. This
9391 // becomes 3 back-to-back memory operations, but at least that is less than
9392 // all the insert/extracts.
9393 //
9394 // In order to do the last, we convert certain trunc's into MVETRUNC, which
9395 // are either optimized where they can be, or eventually lowered into stack
9396 // stores/loads. This prevents us from splitting a v8i16 trunc into two stores
9397 // two early, where other instructions would be better, and stops us from
9398 // having to reconstruct multiple buildvector shuffles into loads/stores.
9399 if (ToVT != MVT::v8i16 && ToVT != MVT::v16i8)
9400 return SDValue();
9401 EVT FromVT = N->getOperand(0).getValueType();
9402 if (FromVT != MVT::v8i32 && FromVT != MVT::v16i16)
9403 return SDValue();
9404
9405 SDValue Lo, Hi;
9406 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
9407 SDLoc DL(N);
9408 return DAG.getNode(ARMISD::MVETRUNC, DL, ToVT, Lo, Hi);
9409}
9410
9412 const ARMSubtarget *Subtarget) {
9413 if (!Subtarget->hasMVEIntegerOps())
9414 return SDValue();
9415
9416 // See LowerTruncate above for an explanation of MVEEXT/MVETRUNC.
9417
9418 EVT ToVT = N->getValueType(0);
9419 if (ToVT != MVT::v16i32 && ToVT != MVT::v8i32 && ToVT != MVT::v16i16)
9420 return SDValue();
9421 SDValue Op = N->getOperand(0);
9422 EVT FromVT = Op.getValueType();
9423 if (FromVT != MVT::v8i16 && FromVT != MVT::v16i8)
9424 return SDValue();
9425
9426 SDLoc DL(N);
9427 EVT ExtVT = ToVT.getHalfNumVectorElementsVT(*DAG.getContext());
9428 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8)
9429 ExtVT = MVT::v8i16;
9430
9431 unsigned Opcode =
9433 SDValue Ext = DAG.getNode(Opcode, DL, DAG.getVTList(ExtVT, ExtVT), Op);
9434 SDValue Ext1 = Ext.getValue(1);
9435
9436 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8) {
9437 Ext = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext);
9438 Ext1 = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext1);
9439 }
9440
9441 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Ext, Ext1);
9442}
9443
9444/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
9445/// element has been zero/sign-extended, depending on the isSigned parameter,
9446/// from an integer type half its size.
9448 bool isSigned) {
9449 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
9450 EVT VT = N->getValueType(0);
9451 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
9452 SDNode *BVN = N->getOperand(0).getNode();
9453 if (BVN->getValueType(0) != MVT::v4i32 ||
9454 BVN->getOpcode() != ISD::BUILD_VECTOR)
9455 return false;
9456 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9457 unsigned HiElt = 1 - LoElt;
9458 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
9459 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
9460 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
9461 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
9462 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
9463 return false;
9464 if (isSigned) {
9465 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
9466 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
9467 return true;
9468 } else {
9469 if (Hi0->isZero() && Hi1->isZero())
9470 return true;
9471 }
9472 return false;
9473 }
9474
9475 if (N->getOpcode() != ISD::BUILD_VECTOR)
9476 return false;
9477
9478 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
9479 SDNode *Elt = N->getOperand(i).getNode();
9480 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
9481 unsigned EltSize = VT.getScalarSizeInBits();
9482 unsigned HalfSize = EltSize / 2;
9483 if (isSigned) {
9484 if (!isIntN(HalfSize, C->getSExtValue()))
9485 return false;
9486 } else {
9487 if (!isUIntN(HalfSize, C->getZExtValue()))
9488 return false;
9489 }
9490 continue;
9491 }
9492 return false;
9493 }
9494
9495 return true;
9496}
9497
9498/// isSignExtended - Check if a node is a vector value that is sign-extended
9499/// or a constant BUILD_VECTOR with sign-extended elements.
9501 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
9502 return true;
9503 if (isExtendedBUILD_VECTOR(N, DAG, true))
9504 return true;
9505 return false;
9506}
9507
9508/// isZeroExtended - Check if a node is a vector value that is zero-extended (or
9509/// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
9511 if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
9513 return true;
9514 if (isExtendedBUILD_VECTOR(N, DAG, false))
9515 return true;
9516 return false;
9517}
9518
9519static EVT getExtensionTo64Bits(const EVT &OrigVT) {
9520 if (OrigVT.getSizeInBits() >= 64)
9521 return OrigVT;
9522
9523 assert(OrigVT.isSimple() && "Expecting a simple value type");
9524
9525 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
9526 switch (OrigSimpleTy) {
9527 default: llvm_unreachable("Unexpected Vector Type");
9528 case MVT::v2i8:
9529 case MVT::v2i16:
9530 return MVT::v2i32;
9531 case MVT::v4i8:
9532 return MVT::v4i16;
9533 }
9534}
9535
9536/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
9537/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
9538/// We insert the required extension here to get the vector to fill a D register.
9540 const EVT &OrigTy,
9541 const EVT &ExtTy,
9542 unsigned ExtOpcode) {
9543 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
9544 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
9545 // 64-bits we need to insert a new extension so that it will be 64-bits.
9546 assert(ExtTy.is128BitVector() && "Unexpected extension size");
9547 if (OrigTy.getSizeInBits() >= 64)
9548 return N;
9549
9550 // Must extend size to at least 64 bits to be used as an operand for VMULL.
9551 EVT NewVT = getExtensionTo64Bits(OrigTy);
9552
9553 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
9554}
9555
9556/// SkipLoadExtensionForVMULL - return a load of the original vector size that
9557/// does not do any sign/zero extension. If the original vector is less
9558/// than 64 bits, an appropriate extension will be added after the load to
9559/// reach a total size of 64 bits. We have to add the extension separately
9560/// because ARM does not have a sign/zero extending load for vectors.
9562 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
9563
9564 // The load already has the right type.
9565 if (ExtendedTy == LD->getMemoryVT())
9566 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
9567 LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),
9568 LD->getMemOperand()->getFlags());
9569
9570 // We need to create a zextload/sextload. We cannot just create a load
9571 // followed by a zext/zext node because LowerMUL is also run during normal
9572 // operation legalization where we can't create illegal types.
9573 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
9574 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
9575 LD->getMemoryVT(), LD->getAlign(),
9576 LD->getMemOperand()->getFlags());
9577}
9578
9579/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
9580/// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return
9581/// the unextended value. The unextended vector should be 64 bits so that it can
9582/// be used as an operand to a VMULL instruction. If the original vector size
9583/// before extension is less than 64 bits we add a an extension to resize
9584/// the vector to 64 bits.
9586 if (N->getOpcode() == ISD::SIGN_EXTEND ||
9587 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
9588 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
9589 N->getOperand(0)->getValueType(0),
9590 N->getValueType(0),
9591 N->getOpcode());
9592
9593 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9594 assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&
9595 "Expected extending load");
9596
9597 SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
9598 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
9599 unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9600 SDValue extLoad =
9601 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
9602 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
9603
9604 return newLoad;
9605 }
9606
9607 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
9608 // have been legalized as a BITCAST from v4i32.
9609 if (N->getOpcode() == ISD::BITCAST) {
9610 SDNode *BVN = N->getOperand(0).getNode();
9612 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
9613 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9614 return DAG.getBuildVector(
9615 MVT::v2i32, SDLoc(N),
9616 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
9617 }
9618 // Construct a new BUILD_VECTOR with elements truncated to half the size.
9619 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
9620 EVT VT = N->getValueType(0);
9621 unsigned EltSize = VT.getScalarSizeInBits() / 2;
9622 unsigned NumElts = VT.getVectorNumElements();
9623 MVT TruncVT = MVT::getIntegerVT(EltSize);
9625 SDLoc dl(N);
9626 for (unsigned i = 0; i != NumElts; ++i) {
9627 const APInt &CInt = N->getConstantOperandAPInt(i);
9628 // Element types smaller than 32 bits are not legal, so use i32 elements.
9629 // The values are implicitly truncated so sext vs. zext doesn't matter.
9630 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
9631 }
9632 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
9633}
9634
9635static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
9636 unsigned Opcode = N->getOpcode();
9637 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9638 SDNode *N0 = N->getOperand(0).getNode();
9639 SDNode *N1 = N->getOperand(1).getNode();
9640 return N0->hasOneUse() && N1->hasOneUse() &&
9641 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
9642 }
9643 return false;
9644}
9645
9646static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
9647 unsigned Opcode = N->getOpcode();
9648 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9649 SDNode *N0 = N->getOperand(0).getNode();
9650 SDNode *N1 = N->getOperand(1).getNode();
9651 return N0->hasOneUse() && N1->hasOneUse() &&
9652 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
9653 }
9654 return false;
9655}
9656
9658 // Multiplications are only custom-lowered for 128-bit vectors so that
9659 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
9660 EVT VT = Op.getValueType();
9661 assert(VT.is128BitVector() && VT.isInteger() &&
9662 "unexpected type for custom-lowering ISD::MUL");
9663 SDNode *N0 = Op.getOperand(0).getNode();
9664 SDNode *N1 = Op.getOperand(1).getNode();
9665 unsigned NewOpc = 0;
9666 bool isMLA = false;
9667 bool isN0SExt = isSignExtended(N0, DAG);
9668 bool isN1SExt = isSignExtended(N1, DAG);
9669 if (isN0SExt && isN1SExt)
9670 NewOpc = ARMISD::VMULLs;
9671 else {
9672 bool isN0ZExt = isZeroExtended(N0, DAG);
9673 bool isN1ZExt = isZeroExtended(N1, DAG);
9674 if (isN0ZExt && isN1ZExt)
9675 NewOpc = ARMISD::VMULLu;
9676 else if (isN1SExt || isN1ZExt) {
9677 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
9678 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
9679 if (isN1SExt && isAddSubSExt(N0, DAG)) {
9680 NewOpc = ARMISD::VMULLs;
9681 isMLA = true;
9682 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
9683 NewOpc = ARMISD::VMULLu;
9684 isMLA = true;
9685 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
9686 std::swap(N0, N1);
9687 NewOpc = ARMISD::VMULLu;
9688 isMLA = true;
9689 }
9690 }
9691
9692 if (!NewOpc) {
9693 if (VT == MVT::v2i64)
9694 // Fall through to expand this. It is not legal.
9695 return SDValue();
9696 else
9697 // Other vector multiplications are legal.
9698 return Op;
9699 }
9700 }
9701
9702 // Legalize to a VMULL instruction.
9703 SDLoc DL(Op);
9704 SDValue Op0;
9705 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
9706 if (!isMLA) {
9707 Op0 = SkipExtensionForVMULL(N0, DAG);
9709 Op1.getValueType().is64BitVector() &&
9710 "unexpected types for extended operands to VMULL");
9711 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
9712 }
9713
9714 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
9715 // isel lowering to take advantage of no-stall back to back vmul + vmla.
9716 // vmull q0, d4, d6
9717 // vmlal q0, d5, d6
9718 // is faster than
9719 // vaddl q0, d4, d5
9720 // vmovl q1, d6
9721 // vmul q0, q0, q1
9722 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
9723 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
9724 EVT Op1VT = Op1.getValueType();
9725 return DAG.getNode(N0->getOpcode(), DL, VT,
9726 DAG.getNode(NewOpc, DL, VT,
9727 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
9728 DAG.getNode(NewOpc, DL, VT,
9729 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
9730}
9731
9733 SelectionDAG &DAG) {
9734 // TODO: Should this propagate fast-math-flags?
9735
9736 // Convert to float
9737 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
9738 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
9739 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
9740 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
9741 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
9742 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
9743 // Get reciprocal estimate.
9744 // float4 recip = vrecpeq_f32(yf);
9745 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9746 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9747 Y);
9748 // Because char has a smaller range than uchar, we can actually get away
9749 // without any newton steps. This requires that we use a weird bias
9750 // of 0xb000, however (again, this has been exhaustively tested).
9751 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
9752 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
9753 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
9754 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
9755 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
9756 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
9757 // Convert back to short.
9758 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
9759 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
9760 return X;
9761}
9762
9764 SelectionDAG &DAG) {
9765 // TODO: Should this propagate fast-math-flags?
9766
9767 SDValue N2;
9768 // Convert to float.
9769 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
9770 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
9771 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
9772 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
9773 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9774 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9775
9776 // Use reciprocal estimate and one refinement step.
9777 // float4 recip = vrecpeq_f32(yf);
9778 // recip *= vrecpsq_f32(yf, recip);
9779 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9780 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9781 N1);
9782 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9783 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9784 N1, N2);
9785 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9786 // Because short has a smaller range than ushort, we can actually get away
9787 // with only a single newton step. This requires that we use a weird bias
9788 // of 89, however (again, this has been exhaustively tested).
9789 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
9790 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9791 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9792 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
9793 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9794 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9795 // Convert back to integer and return.
9796 // return vmovn_s32(vcvt_s32_f32(result));
9797 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9798 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9799 return N0;
9800}
9801
9803 const ARMSubtarget *ST) {
9804 EVT VT = Op.getValueType();
9805 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9806 "unexpected type for custom-lowering ISD::SDIV");
9807
9808 SDLoc dl(Op);
9809 SDValue N0 = Op.getOperand(0);
9810 SDValue N1 = Op.getOperand(1);
9811 SDValue N2, N3;
9812
9813 if (VT == MVT::v8i8) {
9814 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
9815 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
9816
9817 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9818 DAG.getIntPtrConstant(4, dl));
9819 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9820 DAG.getIntPtrConstant(4, dl));
9821 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9822 DAG.getIntPtrConstant(0, dl));
9823 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9824 DAG.getIntPtrConstant(0, dl));
9825
9826 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
9827 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
9828
9829 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9830 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9831
9832 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
9833 return N0;
9834 }
9835 return LowerSDIV_v4i16(N0, N1, dl, DAG);
9836}
9837
9839 const ARMSubtarget *ST) {
9840 // TODO: Should this propagate fast-math-flags?
9841 EVT VT = Op.getValueType();
9842 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9843 "unexpected type for custom-lowering ISD::UDIV");
9844
9845 SDLoc dl(Op);
9846 SDValue N0 = Op.getOperand(0);
9847 SDValue N1 = Op.getOperand(1);
9848 SDValue N2, N3;
9849
9850 if (VT == MVT::v8i8) {
9851 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
9852 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
9853
9854 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9855 DAG.getIntPtrConstant(4, dl));
9856 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9857 DAG.getIntPtrConstant(4, dl));
9858 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9859 DAG.getIntPtrConstant(0, dl));
9860 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9861 DAG.getIntPtrConstant(0, dl));
9862
9863 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
9864 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
9865
9866 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9867 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9868
9869 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
9870 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
9871 MVT::i32),
9872 N0);
9873 return N0;
9874 }
9875
9876 // v4i16 sdiv ... Convert to float.
9877 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
9878 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
9879 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
9880 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
9881 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9882 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9883
9884 // Use reciprocal estimate and two refinement steps.
9885 // float4 recip = vrecpeq_f32(yf);
9886 // recip *= vrecpsq_f32(yf, recip);
9887 // recip *= vrecpsq_f32(yf, recip);
9888 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9889 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9890 BN1);
9891 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9892 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9893 BN1, N2);
9894 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9895 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9896 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9897 BN1, N2);
9898 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9899 // Simply multiplying by the reciprocal estimate can leave us a few ulps
9900 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
9901 // and that it will never cause us to return an answer too large).
9902 // float4 result = as_float4(as_int4(xf*recip) + 2);
9903 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9904 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9905 N1 = DAG.getConstant(2, dl, MVT::v4i32);
9906 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9907 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9908 // Convert back to integer and return.
9909 // return vmovn_u32(vcvt_s32_f32(result));
9910 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9911 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9912 return N0;
9913}
9914
9916 SDNode *N = Op.getNode();
9917 EVT VT = N->getValueType(0);
9918 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
9919
9920 SDValue Carry = Op.getOperand(2);
9921
9922 SDLoc DL(Op);
9923
9924 SDValue Result;
9925 if (Op.getOpcode() == ISD::UADDO_CARRY) {
9926 // This converts the boolean value carry into the carry flag.
9927 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9928
9929 // Do the addition proper using the carry flag we wanted.
9930 Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
9931 Op.getOperand(1), Carry);
9932
9933 // Now convert the carry flag into a boolean value.
9934 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9935 } else {
9936 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
9937 // have to invert the carry first.
9938 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9939 DAG.getConstant(1, DL, MVT::i32), Carry);
9940 // This converts the boolean value carry into the carry flag.
9941 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9942
9943 // Do the subtraction proper using the carry flag we wanted.
9944 Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
9945 Op.getOperand(1), Carry);
9946
9947 // Now convert the carry flag into a boolean value.
9948 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9949 // But the carry returned by ARMISD::SUBE is not a borrow as expected
9950 // by ISD::USUBO_CARRY, so compute 1 - C.
9951 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9952 DAG.getConstant(1, DL, MVT::i32), Carry);
9953 }
9954
9955 // Return both values.
9956 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
9957}
9958
9959SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
9960 assert(Subtarget->isTargetDarwin());
9961
9962 // For iOS, we want to call an alternative entry point: __sincos_stret,
9963 // return values are passed via sret.
9964 SDLoc dl(Op);
9965 SDValue Arg = Op.getOperand(0);
9966 EVT ArgVT = Arg.getValueType();
9967 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
9968 auto PtrVT = getPointerTy(DAG.getDataLayout());
9969
9971 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9972
9973 // Pair of floats / doubles used to pass the result.
9974 Type *RetTy = StructType::get(ArgTy, ArgTy);
9975 auto &DL = DAG.getDataLayout();
9976
9978 bool ShouldUseSRet = Subtarget->isAPCS_ABI();
9979 SDValue SRet;
9980 if (ShouldUseSRet) {
9981 // Create stack object for sret.
9982 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
9983 const Align StackAlign = DL.getPrefTypeAlign(RetTy);
9984 int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
9985 SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
9986
9987 ArgListEntry Entry;
9988 Entry.Node = SRet;
9989 Entry.Ty = PointerType::getUnqual(RetTy->getContext());
9990 Entry.IsSExt = false;
9991 Entry.IsZExt = false;
9992 Entry.IsSRet = true;
9993 Args.push_back(Entry);
9995 }
9996
9997 ArgListEntry Entry;
9998 Entry.Node = Arg;
9999 Entry.Ty = ArgTy;
10000 Entry.IsSExt = false;
10001 Entry.IsZExt = false;
10002 Args.push_back(Entry);
10003
10004 RTLIB::Libcall LC =
10005 (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
10006 const char *LibcallName = getLibcallName(LC);
10008 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
10009
10011 CLI.setDebugLoc(dl)
10012 .setChain(DAG.getEntryNode())
10013 .setCallee(CC, RetTy, Callee, std::move(Args))
10014 .setDiscardResult(ShouldUseSRet);
10015 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10016
10017 if (!ShouldUseSRet)
10018 return CallResult.first;
10019
10020 SDValue LoadSin =
10021 DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
10022
10023 // Address of cos field.
10024 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
10025 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
10026 SDValue LoadCos =
10027 DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
10028
10029 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
10030 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
10031 LoadSin.getValue(0), LoadCos.getValue(0));
10032}
10033
10034SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
10035 bool Signed,
10036 SDValue &Chain) const {
10037 EVT VT = Op.getValueType();
10038 assert((VT == MVT::i32 || VT == MVT::i64) &&
10039 "unexpected type for custom lowering DIV");
10040 SDLoc dl(Op);
10041
10042 const auto &DL = DAG.getDataLayout();
10043 const auto &TLI = DAG.getTargetLoweringInfo();
10044
10045 const char *Name = nullptr;
10046 if (Signed)
10047 Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
10048 else
10049 Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
10050
10052
10054
10055 for (auto AI : {1, 0}) {
10056 ArgListEntry Arg;
10057 Arg.Node = Op.getOperand(AI);
10058 Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
10059 Args.push_back(Arg);
10060 }
10061
10062 CallLoweringInfo CLI(DAG);
10063 CLI.setDebugLoc(dl)
10064 .setChain(Chain)
10066 ES, std::move(Args));
10067
10068 return LowerCallTo(CLI).first;
10069}
10070
10071// This is a code size optimisation: return the original SDIV node to
10072// DAGCombiner when we don't want to expand SDIV into a sequence of
10073// instructions, and an empty node otherwise which will cause the
10074// SDIV to be expanded in DAGCombine.
10075SDValue
10076ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
10077 SelectionDAG &DAG,
10078 SmallVectorImpl<SDNode *> &Created) const {
10079 // TODO: Support SREM
10080 if (N->getOpcode() != ISD::SDIV)
10081 return SDValue();
10082
10083 const auto &ST = DAG.getSubtarget<ARMSubtarget>();
10084 const bool MinSize = ST.hasMinSize();
10085 const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
10086 : ST.hasDivideInARMMode();
10087
10088 // Don't touch vector types; rewriting this may lead to scalarizing
10089 // the int divs.
10090 if (N->getOperand(0).getValueType().isVector())
10091 return SDValue();
10092
10093 // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
10094 // hwdiv support for this to be really profitable.
10095 if (!(MinSize && HasDivide))
10096 return SDValue();
10097
10098 // ARM mode is a bit simpler than Thumb: we can handle large power
10099 // of 2 immediates with 1 mov instruction; no further checks required,
10100 // just return the sdiv node.
10101 if (!ST.isThumb())
10102 return SDValue(N, 0);
10103
10104 // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
10105 // and thus lose the code size benefits of a MOVS that requires only 2.
10106 // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
10107 // but as it's doing exactly this, it's not worth the trouble to get TTI.
10108 if (Divisor.sgt(128))
10109 return SDValue();
10110
10111 return SDValue(N, 0);
10112}
10113
10114SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
10115 bool Signed) const {
10116 assert(Op.getValueType() == MVT::i32 &&
10117 "unexpected type for custom lowering DIV");
10118 SDLoc dl(Op);
10119
10120 SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
10121 DAG.getEntryNode(), Op.getOperand(1));
10122
10123 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10124}
10125
10127 SDLoc DL(N);
10128 SDValue Op = N->getOperand(1);
10129 if (N->getValueType(0) == MVT::i32)
10130 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
10131 SDValue Lo, Hi;
10132 std::tie(Lo, Hi) = DAG.SplitScalar(Op, DL, MVT::i32, MVT::i32);
10133 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
10134 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
10135}
10136
10137void ARMTargetLowering::ExpandDIV_Windows(
10138 SDValue Op, SelectionDAG &DAG, bool Signed,
10140 const auto &DL = DAG.getDataLayout();
10141 const auto &TLI = DAG.getTargetLoweringInfo();
10142
10143 assert(Op.getValueType() == MVT::i64 &&
10144 "unexpected type for custom lowering DIV");
10145 SDLoc dl(Op);
10146
10147 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
10148
10149 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10150
10151 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
10152 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
10153 DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
10154 Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
10155
10156 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lower, Upper));
10157}
10158
10160 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
10161 EVT MemVT = LD->getMemoryVT();
10162 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10163 MemVT == MVT::v16i1) &&
10164 "Expected a predicate type!");
10165 assert(MemVT == Op.getValueType());
10166 assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
10167 "Expected a non-extending load");
10168 assert(LD->isUnindexed() && "Expected a unindexed load");
10169
10170 // The basic MVE VLDR on a v2i1/v4i1/v8i1 actually loads the entire 16bit
10171 // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
10172 // need to make sure that 8/4/2 bits are actually loaded into the correct
10173 // place, which means loading the value and then shuffling the values into
10174 // the bottom bits of the predicate.
10175 // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
10176 // for BE).
10177 // Speaking of BE, apparently the rest of llvm will assume a reverse order to
10178 // a natural VMSR(load), so needs to be reversed.
10179
10180 SDLoc dl(Op);
10181 SDValue Load = DAG.getExtLoad(
10182 ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
10184 LD->getMemOperand());
10185 SDValue Val = Load;
10186 if (DAG.getDataLayout().isBigEndian())
10187 Val = DAG.getNode(ISD::SRL, dl, MVT::i32,
10188 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Load),
10189 DAG.getConstant(32 - MemVT.getSizeInBits(), dl, MVT::i32));
10190 SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Val);
10191 if (MemVT != MVT::v16i1)
10192 Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred,
10193 DAG.getConstant(0, dl, MVT::i32));
10194 return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
10195}
10196
10197void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
10198 SelectionDAG &DAG) const {
10199 LoadSDNode *LD = cast<LoadSDNode>(N);
10200 EVT MemVT = LD->getMemoryVT();
10201 assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
10202
10203 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10204 !Subtarget->isThumb1Only() && LD->isVolatile() &&
10205 LD->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10206 SDLoc dl(N);
10208 ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
10209 {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
10210 SDValue Lo = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 0 : 1);
10211 SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0);
10212 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
10213 Results.append({Pair, Result.getValue(2)});
10214 }
10215}
10216
10218 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10219 EVT MemVT = ST->getMemoryVT();
10220 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10221 MemVT == MVT::v16i1) &&
10222 "Expected a predicate type!");
10223 assert(MemVT == ST->getValue().getValueType());
10224 assert(!ST->isTruncatingStore() && "Expected a non-extending store");
10225 assert(ST->isUnindexed() && "Expected a unindexed store");
10226
10227 // Only store the v2i1 or v4i1 or v8i1 worth of bits, via a buildvector with
10228 // top bits unset and a scalar store.
10229 SDLoc dl(Op);
10230 SDValue Build = ST->getValue();
10231 if (MemVT != MVT::v16i1) {
10233 for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++) {
10234 unsigned Elt = DAG.getDataLayout().isBigEndian()
10235 ? MemVT.getVectorNumElements() - I - 1
10236 : I;
10237 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build,
10238 DAG.getConstant(Elt, dl, MVT::i32)));
10239 }
10240 for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++)
10241 Ops.push_back(DAG.getUNDEF(MVT::i32));
10242 Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops);
10243 }
10244 SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
10245 if (MemVT == MVT::v16i1 && DAG.getDataLayout().isBigEndian())
10246 GRP = DAG.getNode(ISD::SRL, dl, MVT::i32,
10247 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, GRP),
10248 DAG.getConstant(16, dl, MVT::i32));
10249 return DAG.getTruncStore(
10250 ST->getChain(), dl, GRP, ST->getBasePtr(),
10252 ST->getMemOperand());
10253}
10254
10256 const ARMSubtarget *Subtarget) {
10257 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10258 EVT MemVT = ST->getMemoryVT();
10259 assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
10260
10261 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10262 !Subtarget->isThumb1Only() && ST->isVolatile() &&
10263 ST->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10264 SDNode *N = Op.getNode();
10265 SDLoc dl(N);
10266
10267 SDValue Lo = DAG.getNode(
10268 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10269 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 0 : 1, dl,
10270 MVT::i32));
10271 SDValue Hi = DAG.getNode(
10272 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10273 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 1 : 0, dl,
10274 MVT::i32));
10275
10276 return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
10277 {ST->getChain(), Lo, Hi, ST->getBasePtr()},
10278 MemVT, ST->getMemOperand());
10279 } else if (Subtarget->hasMVEIntegerOps() &&
10280 ((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10281 MemVT == MVT::v16i1))) {
10282 return LowerPredicateStore(Op, DAG);
10283 }
10284
10285 return SDValue();
10286}
10287
10288static bool isZeroVector(SDValue N) {
10289 return (ISD::isBuildVectorAllZeros(N.getNode()) ||
10290 (N->getOpcode() == ARMISD::VMOVIMM &&
10291 isNullConstant(N->getOperand(0))));
10292}
10293
10295 MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
10296 MVT VT = Op.getSimpleValueType();
10297 SDValue Mask = N->getMask();
10298 SDValue PassThru = N->getPassThru();
10299 SDLoc dl(Op);
10300
10301 if (isZeroVector(PassThru))
10302 return Op;
10303
10304 // MVE Masked loads use zero as the passthru value. Here we convert undef to
10305 // zero too, and other values are lowered to a select.
10306 SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
10307 DAG.getTargetConstant(0, dl, MVT::i32));
10308 SDValue NewLoad = DAG.getMaskedLoad(
10309 VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,
10310 N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
10311 N->getExtensionType(), N->isExpandingLoad());
10312 SDValue Combo = NewLoad;
10313 bool PassThruIsCastZero = (PassThru.getOpcode() == ISD::BITCAST ||
10314 PassThru.getOpcode() == ARMISD::VECTOR_REG_CAST) &&
10315 isZeroVector(PassThru->getOperand(0));
10316 if (!PassThru.isUndef() && !PassThruIsCastZero)
10317 Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
10318 return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
10319}
10320
10322 const ARMSubtarget *ST) {
10323 if (!ST->hasMVEIntegerOps())
10324 return SDValue();
10325
10326 SDLoc dl(Op);
10327 unsigned BaseOpcode = 0;
10328 switch (Op->getOpcode()) {
10329 default: llvm_unreachable("Expected VECREDUCE opcode");
10330 case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
10331 case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
10332 case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
10333 case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
10334 case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
10335 case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
10336 case ISD::VECREDUCE_FMAX: BaseOpcode = ISD::FMAXNUM; break;
10337 case ISD::VECREDUCE_FMIN: BaseOpcode = ISD::FMINNUM; break;
10338 }
10339
10340 SDValue Op0 = Op->getOperand(0);
10341 EVT VT = Op0.getValueType();
10342 EVT EltVT = VT.getVectorElementType();
10343 unsigned NumElts = VT.getVectorNumElements();
10344 unsigned NumActiveLanes = NumElts;
10345
10346 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10347 NumActiveLanes == 2) &&
10348 "Only expected a power 2 vector size");
10349
10350 // Use Mul(X, Rev(X)) until 4 items remain. Going down to 4 vector elements
10351 // allows us to easily extract vector elements from the lanes.
10352 while (NumActiveLanes > 4) {
10353 unsigned RevOpcode = NumActiveLanes == 16 ? ARMISD::VREV16 : ARMISD::VREV32;
10354 SDValue Rev = DAG.getNode(RevOpcode, dl, VT, Op0);
10355 Op0 = DAG.getNode(BaseOpcode, dl, VT, Op0, Rev);
10356 NumActiveLanes /= 2;
10357 }
10358
10359 SDValue Res;
10360 if (NumActiveLanes == 4) {
10361 // The remaining 4 elements are summed sequentially
10362 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10363 DAG.getConstant(0 * NumElts / 4, dl, MVT::i32));
10364 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10365 DAG.getConstant(1 * NumElts / 4, dl, MVT::i32));
10366 SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10367 DAG.getConstant(2 * NumElts / 4, dl, MVT::i32));
10368 SDValue Ext3 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10369 DAG.getConstant(3 * NumElts / 4, dl, MVT::i32));
10370 SDValue Res0 = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10371 SDValue Res1 = DAG.getNode(BaseOpcode, dl, EltVT, Ext2, Ext3, Op->getFlags());
10372 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res0, Res1, Op->getFlags());
10373 } else {
10374 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10375 DAG.getConstant(0, dl, MVT::i32));
10376 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10377 DAG.getConstant(1, dl, MVT::i32));
10378 Res = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10379 }
10380
10381 // Result type may be wider than element type.
10382 if (EltVT != Op->getValueType(0))
10383 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Op->getValueType(0), Res);
10384 return Res;
10385}
10386
10388 const ARMSubtarget *ST) {
10389 if (!ST->hasMVEFloatOps())
10390 return SDValue();
10391 return LowerVecReduce(Op, DAG, ST);
10392}
10393
10395 const ARMSubtarget *ST) {
10396 if (!ST->hasNEON())
10397 return SDValue();
10398
10399 SDLoc dl(Op);
10400 SDValue Op0 = Op->getOperand(0);
10401 EVT VT = Op0.getValueType();
10402 EVT EltVT = VT.getVectorElementType();
10403
10404 unsigned PairwiseIntrinsic = 0;
10405 switch (Op->getOpcode()) {
10406 default:
10407 llvm_unreachable("Expected VECREDUCE opcode");
10409 PairwiseIntrinsic = Intrinsic::arm_neon_vpminu;
10410 break;
10412 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxu;
10413 break;
10415 PairwiseIntrinsic = Intrinsic::arm_neon_vpmins;
10416 break;
10418 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxs;
10419 break;
10420 }
10421 SDValue PairwiseOp = DAG.getConstant(PairwiseIntrinsic, dl, MVT::i32);
10422
10423 unsigned NumElts = VT.getVectorNumElements();
10424 unsigned NumActiveLanes = NumElts;
10425
10426 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10427 NumActiveLanes == 2) &&
10428 "Only expected a power 2 vector size");
10429
10430 // Split 128-bit vectors, since vpmin/max takes 2 64-bit vectors.
10431 if (VT.is128BitVector()) {
10432 SDValue Lo, Hi;
10433 std::tie(Lo, Hi) = DAG.SplitVector(Op0, dl);
10434 VT = Lo.getValueType();
10435 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Lo, Hi});
10436 NumActiveLanes /= 2;
10437 }
10438
10439 // Use pairwise reductions until one lane remains
10440 while (NumActiveLanes > 1) {
10441 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Op0, Op0});
10442 NumActiveLanes /= 2;
10443 }
10444
10445 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10446 DAG.getConstant(0, dl, MVT::i32));
10447
10448 // Result type may be wider than element type.
10449 if (EltVT != Op.getValueType()) {
10450 unsigned Extend = 0;
10451 switch (Op->getOpcode()) {
10452 default:
10453 llvm_unreachable("Expected VECREDUCE opcode");
10456 Extend = ISD::ZERO_EXTEND;
10457 break;
10460 Extend = ISD::SIGN_EXTEND;
10461 break;
10462 }
10463 Res = DAG.getNode(Extend, dl, Op.getValueType(), Res);
10464 }
10465 return Res;
10466}
10467
10469 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getSuccessOrdering()))
10470 // Acquire/Release load/store is not legal for targets without a dmb or
10471 // equivalent available.
10472 return SDValue();
10473
10474 // Monotonic load/store is legal for all targets.
10475 return Op;
10476}
10477
10480 SelectionDAG &DAG,
10481 const ARMSubtarget *Subtarget) {
10482 SDLoc DL(N);
10483 // Under Power Management extensions, the cycle-count is:
10484 // mrc p15, #0, <Rt>, c9, c13, #0
10485 SDValue Ops[] = { N->getOperand(0), // Chain
10486 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
10487 DAG.getTargetConstant(15, DL, MVT::i32),
10488 DAG.getTargetConstant(0, DL, MVT::i32),
10489 DAG.getTargetConstant(9, DL, MVT::i32),
10490 DAG.getTargetConstant(13, DL, MVT::i32),
10491 DAG.getTargetConstant(0, DL, MVT::i32)
10492 };
10493
10494 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
10495 DAG.getVTList(MVT::i32, MVT::Other), Ops);
10496 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
10497 DAG.getConstant(0, DL, MVT::i32)));
10498 Results.push_back(Cycles32.getValue(1));
10499}
10500
10502 SDValue V1) {
10503 SDLoc dl(V0.getNode());
10504 SDValue RegClass =
10505 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
10506 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
10507 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
10508 const SDValue Ops[] = {RegClass, V0, SubReg0, V1, SubReg1};
10509 return SDValue(
10510 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
10511}
10512
10514 SDLoc dl(V.getNode());
10515 auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);
10516 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10517 if (isBigEndian)
10518 std::swap(VLo, VHi);
10519 return createGPRPairNode2xi32(DAG, VLo, VHi);
10520}
10521
10524 SelectionDAG &DAG) {
10525 assert(N->getValueType(0) == MVT::i64 &&
10526 "AtomicCmpSwap on types less than 64 should be legal");
10527 SDValue Ops[] = {
10528 createGPRPairNode2xi32(DAG, N->getOperand(1),
10529 DAG.getUNDEF(MVT::i32)), // pointer, temp
10530 createGPRPairNodei64(DAG, N->getOperand(2)), // expected
10531 createGPRPairNodei64(DAG, N->getOperand(3)), // new
10532 N->getOperand(0), // chain in
10533 };
10534 SDNode *CmpSwap = DAG.getMachineNode(
10535 ARM::CMP_SWAP_64, SDLoc(N),
10536 DAG.getVTList(MVT::Untyped, MVT::Untyped, MVT::Other), Ops);
10537
10538 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
10539 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
10540
10541 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10542
10543 SDValue Lo =
10544 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
10545 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10546 SDValue Hi =
10547 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
10548 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10549 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i64, Lo, Hi));
10550 Results.push_back(SDValue(CmpSwap, 2));
10551}
10552
10553SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
10554 SDLoc dl(Op);
10555 EVT VT = Op.getValueType();
10556 SDValue Chain = Op.getOperand(0);
10557 SDValue LHS = Op.getOperand(1);
10558 SDValue RHS = Op.getOperand(2);
10559 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
10560 bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
10561
10562 // If we don't have instructions of this float type then soften to a libcall
10563 // and use SETCC instead.
10564 if (isUnsupportedFloatingType(LHS.getValueType())) {
10566 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS, Chain, IsSignaling);
10567 if (!RHS.getNode()) {
10568 RHS = DAG.getConstant(0, dl, LHS.getValueType());
10569 CC = ISD::SETNE;
10570 }
10571 SDValue Result = DAG.getNode(ISD::SETCC, dl, VT, LHS, RHS,
10572 DAG.getCondCode(CC));
10573 return DAG.getMergeValues({Result, Chain}, dl);
10574 }
10575
10576 ARMCC::CondCodes CondCode, CondCode2;
10577 FPCCToARMCC(CC, CondCode, CondCode2);
10578
10579 SDValue True = DAG.getConstant(1, dl, VT);
10580 SDValue False = DAG.getConstant(0, dl, VT);
10581 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
10582 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10583 SDValue Result = getCMOV(dl, VT, False, True, ARMcc, Cmp, DAG);
10584 if (CondCode2 != ARMCC::AL) {
10585 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
10586 Result = getCMOV(dl, VT, Result, True, ARMcc, Cmp, DAG);
10587 }
10588 return DAG.getMergeValues({Result, Chain}, dl);
10589}
10590
10591SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const {
10593
10594 EVT VT = getPointerTy(DAG.getDataLayout());
10595 SDLoc DL(Op);
10596 int FI = MFI.CreateFixedObject(4, 0, false);
10597 return DAG.getFrameIndex(FI, VT);
10598}
10599
10600SDValue ARMTargetLowering::LowerFP_TO_BF16(SDValue Op,
10601 SelectionDAG &DAG) const {
10602 SDLoc DL(Op);
10603 MakeLibCallOptions CallOptions;
10604 MVT SVT = Op.getOperand(0).getSimpleValueType();
10605 RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, MVT::bf16);
10606 SDValue Res =
10607 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
10608 return DAG.getBitcast(MVT::i32, Res);
10609}
10610
10612 LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
10613 switch (Op.getOpcode()) {
10614 default: llvm_unreachable("Don't know how to custom lower this!");
10615 case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
10616 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10617 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10618 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10619 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10620 case ISD::SELECT: return LowerSELECT(Op, DAG);
10621 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10622 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
10623 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
10624 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
10625 case ISD::VASTART: return LowerVASTART(Op, DAG);
10626 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
10627 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
10628 case ISD::SINT_TO_FP:
10629 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10632 case ISD::FP_TO_SINT:
10633 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
10635 case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG, Subtarget);
10636 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
10637 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10638 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10639 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
10640 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
10641 case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
10642 case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);
10643 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
10644 Subtarget);
10645 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
10646 case ISD::SHL:
10647 case ISD::SRL:
10648 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
10649 case ISD::SREM: return LowerREM(Op.getNode(), DAG);
10650 case ISD::UREM: return LowerREM(Op.getNode(), DAG);
10651 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
10652 case ISD::SRL_PARTS:
10653 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
10654 case ISD::CTTZ:
10655 case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
10656 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
10657 case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget);
10658 case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
10659 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
10660 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
10661 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
10662 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG, Subtarget);
10663 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10664 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);
10665 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);
10666 case ISD::TRUNCATE: return LowerTruncate(Op.getNode(), DAG, Subtarget);
10667 case ISD::SIGN_EXTEND:
10668 case ISD::ZERO_EXTEND: return LowerVectorExtend(Op.getNode(), DAG, Subtarget);
10669 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
10670 case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
10671 case ISD::SET_FPMODE:
10672 return LowerSET_FPMODE(Op, DAG);
10673 case ISD::RESET_FPMODE:
10674 return LowerRESET_FPMODE(Op, DAG);
10675 case ISD::MUL: return LowerMUL(Op, DAG);
10676 case ISD::SDIV:
10677 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10678 return LowerDIV_Windows(Op, DAG, /* Signed */ true);
10679 return LowerSDIV(Op, DAG, Subtarget);
10680 case ISD::UDIV:
10681 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10682 return LowerDIV_Windows(Op, DAG, /* Signed */ false);
10683 return LowerUDIV(Op, DAG, Subtarget);
10684 case ISD::UADDO_CARRY:
10685 case ISD::USUBO_CARRY:
10686 return LowerUADDSUBO_CARRY(Op, DAG);
10687 case ISD::SADDO:
10688 case ISD::SSUBO:
10689 return LowerSignedALUO(Op, DAG);
10690 case ISD::UADDO:
10691 case ISD::USUBO:
10692 return LowerUnsignedALUO(Op, DAG);
10693 case ISD::SADDSAT:
10694 case ISD::SSUBSAT:
10695 case ISD::UADDSAT:
10696 case ISD::USUBSAT:
10697 return LowerADDSUBSAT(Op, DAG, Subtarget);
10698 case ISD::LOAD:
10699 return LowerPredicateLoad(Op, DAG);
10700 case ISD::STORE:
10701 return LowerSTORE(Op, DAG, Subtarget);
10702 case ISD::MLOAD:
10703 return LowerMLOAD(Op, DAG);
10704 case ISD::VECREDUCE_MUL:
10705 case ISD::VECREDUCE_AND:
10706 case ISD::VECREDUCE_OR:
10707 case ISD::VECREDUCE_XOR:
10708 return LowerVecReduce(Op, DAG, Subtarget);
10713 return LowerVecReduceF(Op, DAG, Subtarget);
10718 return LowerVecReduceMinMax(Op, DAG, Subtarget);
10719 case ISD::ATOMIC_LOAD:
10720 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
10721 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
10722 case ISD::SDIVREM:
10723 case ISD::UDIVREM: return LowerDivRem(Op, DAG);
10725 if (Subtarget->isTargetWindows())
10726 return LowerDYNAMIC_STACKALLOC(Op, DAG);
10727 llvm_unreachable("Don't know how to custom lower this!");
10729 case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
10731 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10732 case ISD::STRICT_FSETCC:
10733 case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
10734 case ISD::SPONENTRY:
10735 return LowerSPONENTRY(Op, DAG);
10736 case ISD::FP_TO_BF16:
10737 return LowerFP_TO_BF16(Op, DAG);
10738 case ARMISD::WIN__DBZCHK: return SDValue();
10739 }
10740}
10741
10743 SelectionDAG &DAG) {
10744 unsigned IntNo = N->getConstantOperandVal(0);
10745 unsigned Opc = 0;
10746 if (IntNo == Intrinsic::arm_smlald)
10747 Opc = ARMISD::SMLALD;
10748 else if (IntNo == Intrinsic::arm_smlaldx)
10749 Opc = ARMISD::SMLALDX;
10750 else if (IntNo == Intrinsic::arm_smlsld)
10751 Opc = ARMISD::SMLSLD;
10752 else if (IntNo == Intrinsic::arm_smlsldx)
10753 Opc = ARMISD::SMLSLDX;
10754 else
10755 return;
10756
10757 SDLoc dl(N);
10758 SDValue Lo, Hi;
10759 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(3), dl, MVT::i32, MVT::i32);
10760
10761 SDValue LongMul = DAG.getNode(Opc, dl,
10762 DAG.getVTList(MVT::i32, MVT::i32),
10763 N->getOperand(1), N->getOperand(2),
10764 Lo, Hi);
10765 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
10766 LongMul.getValue(0), LongMul.getValue(1)));
10767}
10768
10769/// ReplaceNodeResults - Replace the results of node with an illegal result
10770/// type with new values built out of custom code.
10773 SelectionDAG &DAG) const {
10774 SDValue Res;
10775 switch (N->getOpcode()) {
10776 default:
10777 llvm_unreachable("Don't know how to custom expand this!");
10778 case ISD::READ_REGISTER:
10780 break;
10781 case ISD::BITCAST:
10782 Res = ExpandBITCAST(N, DAG, Subtarget);
10783 break;
10784 case ISD::SRL:
10785 case ISD::SRA:
10786 case ISD::SHL:
10787 Res = Expand64BitShift(N, DAG, Subtarget);
10788 break;
10789 case ISD::SREM:
10790 case ISD::UREM:
10791 Res = LowerREM(N, DAG);
10792 break;
10793 case ISD::SDIVREM:
10794 case ISD::UDIVREM:
10795 Res = LowerDivRem(SDValue(N, 0), DAG);
10796 assert(Res.getNumOperands() == 2 && "DivRem needs two values");
10797 Results.push_back(Res.getValue(0));
10798 Results.push_back(Res.getValue(1));
10799 return;
10800 case ISD::SADDSAT:
10801 case ISD::SSUBSAT:
10802 case ISD::UADDSAT:
10803 case ISD::USUBSAT:
10804 Res = LowerADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
10805 break;
10807 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
10808 return;
10809 case ISD::UDIV:
10810 case ISD::SDIV:
10811 assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
10812 return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
10813 Results);
10816 return;
10818 return ReplaceLongIntrinsic(N, Results, DAG);
10819 case ISD::LOAD:
10820 LowerLOAD(N, Results, DAG);
10821 break;
10822 case ISD::TRUNCATE:
10823 Res = LowerTruncate(N, DAG, Subtarget);
10824 break;
10825 case ISD::SIGN_EXTEND:
10826 case ISD::ZERO_EXTEND:
10827 Res = LowerVectorExtend(N, DAG, Subtarget);
10828 break;
10831 Res = LowerFP_TO_INT_SAT(SDValue(N, 0), DAG, Subtarget);
10832 break;
10833 }
10834 if (Res.getNode())
10835 Results.push_back(Res);
10836}
10837
10838//===----------------------------------------------------------------------===//
10839// ARM Scheduler Hooks
10840//===----------------------------------------------------------------------===//
10841
10842/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
10843/// registers the function context.
10844void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
10846 MachineBasicBlock *DispatchBB,
10847 int FI) const {
10848 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
10849 "ROPI/RWPI not currently supported with SjLj");
10850 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10851 DebugLoc dl = MI.getDebugLoc();
10852 MachineFunction *MF = MBB->getParent();
10856 const Function &F = MF->getFunction();
10857
10858 bool isThumb = Subtarget->isThumb();
10859 bool isThumb2 = Subtarget->isThumb2();
10860
10861 unsigned PCLabelId = AFI->createPICLabelUId();
10862 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
10864 ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
10865 unsigned CPI = MCP->getConstantPoolIndex(CPV, Align(4));
10866
10867 const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
10868 : &ARM::GPRRegClass;
10869
10870 // Grab constant pool and fixed stack memory operands.
10871 MachineMemOperand *CPMMO =
10874
10875 MachineMemOperand *FIMMOSt =
10878
10879 // Load the address of the dispatch MBB into the jump buffer.
10880 if (isThumb2) {
10881 // Incoming value: jbuf
10882 // ldr.n r5, LCPI1_1
10883 // orr r5, r5, #1
10884 // add r5, pc
10885 // str r5, [$jbuf, #+4] ; &jbuf[1]
10886 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10887 BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
10889 .addMemOperand(CPMMO)
10891 // Set the low bit because of thumb mode.
10892 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10893 BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
10894 .addReg(NewVReg1, RegState::Kill)
10895 .addImm(0x01)
10897 .add(condCodeOp());
10898 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10899 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
10900 .addReg(NewVReg2, RegState::Kill)
10901 .addImm(PCLabelId);
10902 BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
10903 .addReg(NewVReg3, RegState::Kill)
10904 .addFrameIndex(FI)
10905 .addImm(36) // &jbuf[1] :: pc
10906 .addMemOperand(FIMMOSt)
10908 } else if (isThumb) {
10909 // Incoming value: jbuf
10910 // ldr.n r1, LCPI1_4
10911 // add r1, pc
10912 // mov r2, #1
10913 // orrs r1, r2
10914 // add r2, $jbuf, #+4 ; &jbuf[1]
10915 // str r1, [r2]
10916 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10917 BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
10919 .addMemOperand(CPMMO)
10921 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10922 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
10923 .addReg(NewVReg1, RegState::Kill)
10924 .addImm(PCLabelId);
10925 // Set the low bit because of thumb mode.
10926 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10927 BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
10928 .addReg(ARM::CPSR, RegState::Define)
10929 .addImm(1)
10931 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10932 BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
10933 .addReg(ARM::CPSR, RegState::Define)
10934 .addReg(NewVReg2, RegState::Kill)
10935 .addReg(NewVReg3, RegState::Kill)
10937 Register NewVReg5 = MRI->createVirtualRegister(TRC);
10938 BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
10939 .addFrameIndex(FI)
10940 .addImm(36); // &jbuf[1] :: pc
10941 BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
10942 .addReg(NewVReg4, RegState::Kill)
10943 .addReg(NewVReg5, RegState::Kill)
10944 .addImm(0)
10945 .addMemOperand(FIMMOSt)
10947 } else {
10948 // Incoming value: jbuf
10949 // ldr r1, LCPI1_1
10950 // add r1, pc, r1
10951 // str r1, [$jbuf, #+4] ; &jbuf[1]
10952 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10953 BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
10955 .addImm(0)
10956 .addMemOperand(CPMMO)
10958 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10959 BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
10960 .addReg(NewVReg1, RegState::Kill)
10961 .addImm(PCLabelId)
10963 BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
10964 .addReg(NewVReg2, RegState::Kill)
10965 .addFrameIndex(FI)
10966 .addImm(36) // &jbuf[1] :: pc
10967 .addMemOperand(FIMMOSt)
10969 }
10970}
10971
10972void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
10973 MachineBasicBlock *MBB) const {
10974 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10975 DebugLoc dl = MI.getDebugLoc();
10976 MachineFunction *MF = MBB->getParent();
10978 MachineFrameInfo &MFI = MF->getFrameInfo();
10979 int FI = MFI.getFunctionContextIndex();
10980
10981 const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
10982 : &ARM::GPRnopcRegClass;
10983
10984 // Get a mapping of the call site numbers to all of the landing pads they're
10985 // associated with.
10987 unsigned MaxCSNum = 0;
10988 for (MachineBasicBlock &BB : *MF) {
10989 if (!BB.isEHPad())
10990 continue;
10991
10992 // FIXME: We should assert that the EH_LABEL is the first MI in the landing
10993 // pad.
10994 for (MachineInstr &II : BB) {
10995 if (!II.isEHLabel())
10996 continue;
10997
10998 MCSymbol *Sym = II.getOperand(0).getMCSymbol();
10999 if (!MF->hasCallSiteLandingPad(Sym)) continue;
11000
11001 SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
11002 for (unsigned Idx : CallSiteIdxs) {
11003 CallSiteNumToLPad[Idx].push_back(&BB);
11004 MaxCSNum = std::max(MaxCSNum, Idx);
11005 }
11006 break;
11007 }
11008 }
11009
11010 // Get an ordered list of the machine basic blocks for the jump table.
11011 std::vector<MachineBasicBlock*> LPadList;
11013 LPadList.reserve(CallSiteNumToLPad.size());
11014 for (unsigned I = 1; I <= MaxCSNum; ++I) {
11015 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
11016 for (MachineBasicBlock *MBB : MBBList) {
11017 LPadList.push_back(MBB);
11018 InvokeBBs.insert(MBB->pred_begin(), MBB->pred_end());
11019 }
11020 }
11021
11022 assert(!LPadList.empty() &&
11023 "No landing pad destinations for the dispatch jump table!");
11024
11025 // Create the jump table and associated information.
11027 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
11028 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
11029
11030 // Create the MBBs for the dispatch code.
11031
11032 // Shove the dispatch's address into the return slot in the function context.
11033 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
11034 DispatchBB->setIsEHPad();
11035
11036 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
11037 unsigned trap_opcode;
11038 if (Subtarget->isThumb())
11039 trap_opcode = ARM::tTRAP;
11040 else
11041 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
11042
11043 BuildMI(TrapBB, dl, TII->get(trap_opcode));
11044 DispatchBB->addSuccessor(TrapBB);
11045
11046 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
11047 DispatchBB->addSuccessor(DispContBB);
11048
11049 // Insert and MBBs.
11050 MF->insert(MF->end(), DispatchBB);
11051 MF->insert(MF->end(), DispContBB);
11052 MF->insert(MF->end(), TrapBB);
11053
11054 // Insert code into the entry block that creates and registers the function
11055 // context.
11056 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
11057
11058 MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
11061
11063 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
11064
11065 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
11066 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
11067
11068 // Add a register mask with no preserved registers. This results in all
11069 // registers being marked as clobbered. This can't work if the dispatch block
11070 // is in a Thumb1 function and is linked with ARM code which uses the FP
11071 // registers, as there is no way to preserve the FP registers in Thumb1 mode.
11073
11074 bool IsPositionIndependent = isPositionIndependent();
11075 unsigned NumLPads = LPadList.size();
11076 if (Subtarget->isThumb2()) {
11077 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11078 BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
11079 .addFrameIndex(FI)
11080 .addImm(4)
11081 .addMemOperand(FIMMOLd)
11083
11084 if (NumLPads < 256) {
11085 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
11086 .addReg(NewVReg1)
11087 .addImm(LPadList.size())
11089 } else {
11090 Register VReg1 = MRI->createVirtualRegister(TRC);
11091 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
11092 .addImm(NumLPads & 0xFFFF)
11094
11095 unsigned VReg2 = VReg1;
11096 if ((NumLPads & 0xFFFF0000) != 0) {
11097 VReg2 = MRI->createVirtualRegister(TRC);
11098 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
11099 .addReg(VReg1)
11100 .addImm(NumLPads >> 16)
11102 }
11103
11104 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
11105 .addReg(NewVReg1)
11106 .addReg(VReg2)
11108 }
11109
11110 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
11111 .addMBB(TrapBB)
11113 .addReg(ARM::CPSR);
11114
11115 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11116 BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
11117 .addJumpTableIndex(MJTI)
11119
11120 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11121 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
11122 .addReg(NewVReg3, RegState::Kill)
11123 .addReg(NewVReg1)
11126 .add(condCodeOp());
11127
11128 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
11129 .addReg(NewVReg4, RegState::Kill)
11130 .addReg(NewVReg1)
11131 .addJumpTableIndex(MJTI);
11132 } else if (Subtarget->isThumb()) {
11133 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11134 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
11135 .addFrameIndex(FI)
11136 .addImm(1)
11137 .addMemOperand(FIMMOLd)
11139
11140 if (NumLPads < 256) {
11141 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
11142 .addReg(NewVReg1)
11143 .addImm(NumLPads)
11145 } else {
11146 MachineConstantPool *ConstantPool = MF->getConstantPool();
11147 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11148 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11149
11150 // MachineConstantPool wants an explicit alignment.
11151 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11152 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11153
11154 Register VReg1 = MRI->createVirtualRegister(TRC);
11155 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
11156 .addReg(VReg1, RegState::Define)
11159 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
11160 .addReg(NewVReg1)
11161 .addReg(VReg1)
11163 }
11164
11165 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
11166 .addMBB(TrapBB)
11168 .addReg(ARM::CPSR);
11169
11170 Register NewVReg2 = MRI->createVirtualRegister(TRC);
11171 BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
11172 .addReg(ARM::CPSR, RegState::Define)
11173 .addReg(NewVReg1)
11174 .addImm(2)
11176
11177 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11178 BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
11179 .addJumpTableIndex(MJTI)
11181
11182 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11183 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
11184 .addReg(ARM::CPSR, RegState::Define)
11185 .addReg(NewVReg2, RegState::Kill)
11186 .addReg(NewVReg3)
11188
11189 MachineMemOperand *JTMMOLd =
11190 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11192
11193 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11194 BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
11195 .addReg(NewVReg4, RegState::Kill)
11196 .addImm(0)
11197 .addMemOperand(JTMMOLd)
11199
11200 unsigned NewVReg6 = NewVReg5;
11201 if (IsPositionIndependent) {
11202 NewVReg6 = MRI->createVirtualRegister(TRC);
11203 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
11204 .addReg(ARM::CPSR, RegState::Define)
11205 .addReg(NewVReg5, RegState::Kill)
11206 .addReg(NewVReg3)
11208 }
11209
11210 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
11211 .addReg(NewVReg6, RegState::Kill)
11212 .addJumpTableIndex(MJTI);
11213 } else {
11214 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11215 BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
11216 .addFrameIndex(FI)
11217 .addImm(4)
11218 .addMemOperand(FIMMOLd)
11220
11221 if (NumLPads < 256) {
11222 BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
11223 .addReg(NewVReg1)
11224 .addImm(NumLPads)
11226 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
11227 Register VReg1 = MRI->createVirtualRegister(TRC);
11228 BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
11229 .addImm(NumLPads & 0xFFFF)
11231
11232 unsigned VReg2 = VReg1;
11233 if ((NumLPads & 0xFFFF0000) != 0) {
11234 VReg2 = MRI->createVirtualRegister(TRC);
11235 BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
11236 .addReg(VReg1)
11237 .addImm(NumLPads >> 16)
11239 }
11240
11241 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11242 .addReg(NewVReg1)
11243 .addReg(VReg2)
11245 } else {
11246 MachineConstantPool *ConstantPool = MF->getConstantPool();
11247 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11248 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11249
11250 // MachineConstantPool wants an explicit alignment.
11251 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11252 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11253
11254 Register VReg1 = MRI->createVirtualRegister(TRC);
11255 BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
11256 .addReg(VReg1, RegState::Define)
11258 .addImm(0)
11260 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11261 .addReg(NewVReg1)
11262 .addReg(VReg1, RegState::Kill)
11264 }
11265
11266 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
11267 .addMBB(TrapBB)
11269 .addReg(ARM::CPSR);
11270
11271 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11272 BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
11273 .addReg(NewVReg1)
11276 .add(condCodeOp());
11277 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11278 BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
11279 .addJumpTableIndex(MJTI)
11281
11282 MachineMemOperand *JTMMOLd =
11283 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11285 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11286 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
11287 .addReg(NewVReg3, RegState::Kill)
11288 .addReg(NewVReg4)
11289 .addImm(0)
11290 .addMemOperand(JTMMOLd)
11292
11293 if (IsPositionIndependent) {
11294 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
11295 .addReg(NewVReg5, RegState::Kill)
11296 .addReg(NewVReg4)
11297 .addJumpTableIndex(MJTI);
11298 } else {
11299 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
11300 .addReg(NewVReg5, RegState::Kill)
11301 .addJumpTableIndex(MJTI);
11302 }
11303 }
11304
11305 // Add the jump table entries as successors to the MBB.
11307 for (MachineBasicBlock *CurMBB : LPadList) {
11308 if (SeenMBBs.insert(CurMBB).second)
11309 DispContBB->addSuccessor(CurMBB);
11310 }
11311
11312 // N.B. the order the invoke BBs are processed in doesn't matter here.
11313 const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
11315 for (MachineBasicBlock *BB : InvokeBBs) {
11316
11317 // Remove the landing pad successor from the invoke block and replace it
11318 // with the new dispatch block.
11319 SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
11320 while (!Successors.empty()) {
11321 MachineBasicBlock *SMBB = Successors.pop_back_val();
11322 if (SMBB->isEHPad()) {
11323 BB->removeSuccessor(SMBB);
11324 MBBLPads.push_back(SMBB);
11325 }
11326 }
11327
11328 BB->addSuccessor(DispatchBB, BranchProbability::getZero());
11329 BB->normalizeSuccProbs();
11330
11331 // Find the invoke call and mark all of the callee-saved registers as
11332 // 'implicit defined' so that they're spilled. This prevents code from
11333 // moving instructions to before the EH block, where they will never be
11334 // executed.
11336 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
11337 if (!II->isCall()) continue;
11338
11341 OI = II->operands_begin(), OE = II->operands_end();
11342 OI != OE; ++OI) {
11343 if (!OI->isReg()) continue;
11344 DefRegs[OI->getReg()] = true;
11345 }
11346
11347 MachineInstrBuilder MIB(*MF, &*II);
11348
11349 for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
11350 unsigned Reg = SavedRegs[i];
11351 if (Subtarget->isThumb2() &&
11352 !ARM::tGPRRegClass.contains(Reg) &&
11353 !ARM::hGPRRegClass.contains(Reg))
11354 continue;
11355 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
11356 continue;
11357 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
11358 continue;
11359 if (!DefRegs[Reg])
11361 }
11362
11363 break;
11364 }
11365 }
11366
11367 // Mark all former landing pads as non-landing pads. The dispatch is the only
11368 // landing pad now.
11369 for (MachineBasicBlock *MBBLPad : MBBLPads)
11370 MBBLPad->setIsEHPad(false);
11371
11372 // The instruction is gone now.
11373 MI.eraseFromParent();
11374}
11375
11376static
11378 for (MachineBasicBlock *S : MBB->successors())
11379 if (S != Succ)
11380 return S;
11381 llvm_unreachable("Expecting a BB with two successors!");
11382}
11383
11384/// Return the load opcode for a given load size. If load size >= 8,
11385/// neon opcode will be returned.
11386static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
11387 if (LdSize >= 8)
11388 return LdSize == 16 ? ARM::VLD1q32wb_fixed
11389 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
11390 if (IsThumb1)
11391 return LdSize == 4 ? ARM::tLDRi
11392 : LdSize == 2 ? ARM::tLDRHi
11393 : LdSize == 1 ? ARM::tLDRBi : 0;
11394 if (IsThumb2)
11395 return LdSize == 4 ? ARM::t2LDR_POST
11396 : LdSize == 2 ? ARM::t2LDRH_POST
11397 : LdSize == 1 ? ARM::t2LDRB_POST : 0;
11398 return LdSize == 4 ? ARM::LDR_POST_IMM
11399 : LdSize == 2 ? ARM::LDRH_POST
11400 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
11401}
11402
11403/// Return the store opcode for a given store size. If store size >= 8,
11404/// neon opcode will be returned.
11405static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
11406 if (StSize >= 8)
11407 return StSize == 16 ? ARM::VST1q32wb_fixed
11408 : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
11409 if (IsThumb1)
11410 return StSize == 4 ? ARM::tSTRi
11411 : StSize == 2 ? ARM::tSTRHi
11412 : StSize == 1 ? ARM::tSTRBi : 0;
11413 if (IsThumb2)
11414 return StSize == 4 ? ARM::t2STR_POST
11415 : StSize == 2 ? ARM::t2STRH_POST
11416 : StSize == 1 ? ARM::t2STRB_POST : 0;
11417 return StSize == 4 ? ARM::STR_POST_IMM
11418 : StSize == 2 ? ARM::STRH_POST
11419 : StSize == 1 ? ARM::STRB_POST_IMM : 0;
11420}
11421
11422/// Emit a post-increment load operation with given size. The instructions
11423/// will be added to BB at Pos.
11425 const TargetInstrInfo *TII, const DebugLoc &dl,
11426 unsigned LdSize, unsigned Data, unsigned AddrIn,
11427 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11428 unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
11429 assert(LdOpc != 0 && "Should have a load opcode");
11430 if (LdSize >= 8) {
11431 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11432 .addReg(AddrOut, RegState::Define)
11433 .addReg(AddrIn)
11434 .addImm(0)
11436 } else if (IsThumb1) {
11437 // load + update AddrIn
11438 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11439 .addReg(AddrIn)
11440 .addImm(0)
11442 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11443 .add(t1CondCodeOp())
11444 .addReg(AddrIn)
11445 .addImm(LdSize)
11447 } else if (IsThumb2) {
11448 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11449 .addReg(AddrOut, RegState::Define)
11450 .addReg(AddrIn)
11451 .addImm(LdSize)
11453 } else { // arm
11454 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11455 .addReg(AddrOut, RegState::Define)
11456 .addReg(AddrIn)
11457 .addReg(0)
11458 .addImm(LdSize)
11460 }
11461}
11462
11463/// Emit a post-increment store operation with given size. The instructions
11464/// will be added to BB at Pos.
11466 const TargetInstrInfo *TII, const DebugLoc &dl,
11467 unsigned StSize, unsigned Data, unsigned AddrIn,
11468 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11469 unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
11470 assert(StOpc != 0 && "Should have a store opcode");
11471 if (StSize >= 8) {
11472 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11473 .addReg(AddrIn)
11474 .addImm(0)
11475 .addReg(Data)
11477 } else if (IsThumb1) {
11478 // store + update AddrIn
11479 BuildMI(*BB, Pos, dl, TII->get(StOpc))
11480 .addReg(Data)
11481 .addReg(AddrIn)
11482 .addImm(0)
11484 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11485 .add(t1CondCodeOp())
11486 .addReg(AddrIn)
11487 .addImm(StSize)
11489 } else if (IsThumb2) {
11490 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11491 .addReg(Data)
11492 .addReg(AddrIn)
11493 .addImm(StSize)
11495 } else { // arm
11496 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11497 .addReg(Data)
11498 .addReg(AddrIn)
11499 .addReg(0)
11500 .addImm(StSize)
11502 }
11503}
11504
11506ARMTargetLowering::EmitStructByval(MachineInstr &MI,
11507 MachineBasicBlock *BB) const {
11508 // This pseudo instruction has 3 operands: dst, src, size
11509 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
11510 // Otherwise, we will generate unrolled scalar copies.
11511 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11512 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11514
11515 Register dest = MI.getOperand(0).getReg();
11516 Register src = MI.getOperand(1).getReg();
11517 unsigned SizeVal = MI.getOperand(2).getImm();
11518 unsigned Alignment = MI.getOperand(3).getImm();
11519 DebugLoc dl = MI.getDebugLoc();
11520
11521 MachineFunction *MF = BB->getParent();
11523 unsigned UnitSize = 0;
11524 const TargetRegisterClass *TRC = nullptr;
11525 const TargetRegisterClass *VecTRC = nullptr;
11526
11527 bool IsThumb1 = Subtarget->isThumb1Only();
11528 bool IsThumb2 = Subtarget->isThumb2();
11529 bool IsThumb = Subtarget->isThumb();
11530
11531 if (Alignment & 1) {
11532 UnitSize = 1;
11533 } else if (Alignment & 2) {
11534 UnitSize = 2;
11535 } else {
11536 // Check whether we can use NEON instructions.
11537 if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
11538 Subtarget->hasNEON()) {
11539 if ((Alignment % 16 == 0) && SizeVal >= 16)
11540 UnitSize = 16;
11541 else if ((Alignment % 8 == 0) && SizeVal >= 8)
11542 UnitSize = 8;
11543 }
11544 // Can't use NEON instructions.
11545 if (UnitSize == 0)
11546 UnitSize = 4;
11547 }
11548
11549 // Select the correct opcode and register class for unit size load/store
11550 bool IsNeon = UnitSize >= 8;
11551 TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
11552 if (IsNeon)
11553 VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
11554 : UnitSize == 8 ? &ARM::DPRRegClass
11555 : nullptr;
11556
11557 unsigned BytesLeft = SizeVal % UnitSize;
11558 unsigned LoopSize = SizeVal - BytesLeft;
11559
11560 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
11561 // Use LDR and STR to copy.
11562 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
11563 // [destOut] = STR_POST(scratch, destIn, UnitSize)
11564 unsigned srcIn = src;
11565 unsigned destIn = dest;
11566 for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
11567 Register srcOut = MRI.createVirtualRegister(TRC);
11568 Register destOut = MRI.createVirtualRegister(TRC);
11569 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11570 emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
11571 IsThumb1, IsThumb2);
11572 emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
11573 IsThumb1, IsThumb2);
11574 srcIn = srcOut;
11575 destIn = destOut;
11576 }
11577
11578 // Handle the leftover bytes with LDRB and STRB.
11579 // [scratch, srcOut] = LDRB_POST(srcIn, 1)
11580 // [destOut] = STRB_POST(scratch, destIn, 1)
11581 for (unsigned i = 0; i < BytesLeft; i++) {
11582 Register srcOut = MRI.createVirtualRegister(TRC);
11583 Register destOut = MRI.createVirtualRegister(TRC);
11584 Register scratch = MRI.createVirtualRegister(TRC);
11585 emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
11586 IsThumb1, IsThumb2);
11587 emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
11588 IsThumb1, IsThumb2);
11589 srcIn = srcOut;
11590 destIn = destOut;
11591 }
11592 MI.eraseFromParent(); // The instruction is gone now.
11593 return BB;
11594 }
11595
11596 // Expand the pseudo op to a loop.
11597 // thisMBB:
11598 // ...
11599 // movw varEnd, # --> with thumb2
11600 // movt varEnd, #
11601 // ldrcp varEnd, idx --> without thumb2
11602 // fallthrough --> loopMBB
11603 // loopMBB:
11604 // PHI varPhi, varEnd, varLoop
11605 // PHI srcPhi, src, srcLoop
11606 // PHI destPhi, dst, destLoop
11607 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11608 // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
11609 // subs varLoop, varPhi, #UnitSize
11610 // bne loopMBB
11611 // fallthrough --> exitMBB
11612 // exitMBB:
11613 // epilogue to handle left-over bytes
11614 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11615 // [destOut] = STRB_POST(scratch, destLoop, 1)
11616 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11617 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11618 MF->insert(It, loopMBB);
11619 MF->insert(It, exitMBB);
11620
11621 // Set the call frame size on entry to the new basic blocks.
11622 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
11623 loopMBB->setCallFrameSize(CallFrameSize);
11624 exitMBB->setCallFrameSize(CallFrameSize);
11625
11626 // Transfer the remainder of BB and its successor edges to exitMBB.
11627 exitMBB->splice(exitMBB->begin(), BB,
11628 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11630
11631 // Load an immediate to varEnd.
11632 Register varEnd = MRI.createVirtualRegister(TRC);
11633 if (Subtarget->useMovt()) {
11634 BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi32imm : ARM::MOVi32imm),
11635 varEnd)
11636 .addImm(LoopSize);
11637 } else if (Subtarget->genExecuteOnly()) {
11638 assert(IsThumb && "Non-thumb expected to have used movt");
11639 BuildMI(BB, dl, TII->get(ARM::tMOVi32imm), varEnd).addImm(LoopSize);
11640 } else {
11642 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11643 const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
11644
11645 // MachineConstantPool wants an explicit alignment.
11646 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11647 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11648 MachineMemOperand *CPMMO =
11651
11652 if (IsThumb)
11653 BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
11654 .addReg(varEnd, RegState::Define)
11657 .addMemOperand(CPMMO);
11658 else
11659 BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
11660 .addReg(varEnd, RegState::Define)
11662 .addImm(0)
11664 .addMemOperand(CPMMO);
11665 }
11666 BB->addSuccessor(loopMBB);
11667
11668 // Generate the loop body:
11669 // varPhi = PHI(varLoop, varEnd)
11670 // srcPhi = PHI(srcLoop, src)
11671 // destPhi = PHI(destLoop, dst)
11672 MachineBasicBlock *entryBB = BB;
11673 BB = loopMBB;
11674 Register varLoop = MRI.createVirtualRegister(TRC);
11675 Register varPhi = MRI.createVirtualRegister(TRC);
11676 Register srcLoop = MRI.createVirtualRegister(TRC);
11677 Register srcPhi = MRI.createVirtualRegister(TRC);
11678 Register destLoop = MRI.createVirtualRegister(TRC);
11679 Register destPhi = MRI.createVirtualRegister(TRC);
11680
11681 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
11682 .addReg(varLoop).addMBB(loopMBB)
11683 .addReg(varEnd).addMBB(entryBB);
11684 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
11685 .addReg(srcLoop).addMBB(loopMBB)
11686 .addReg(src).addMBB(entryBB);
11687 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
11688 .addReg(destLoop).addMBB(loopMBB)
11689 .addReg(dest).addMBB(entryBB);
11690
11691 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11692 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
11693 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11694 emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
11695 IsThumb1, IsThumb2);
11696 emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
11697 IsThumb1, IsThumb2);
11698
11699 // Decrement loop variable by UnitSize.
11700 if (IsThumb1) {
11701 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
11702 .add(t1CondCodeOp())
11703 .addReg(varPhi)
11704 .addImm(UnitSize)
11706 } else {
11708 BuildMI(*BB, BB->end(), dl,
11709 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
11710 MIB.addReg(varPhi)
11711 .addImm(UnitSize)
11713 .add(condCodeOp());
11714 MIB->getOperand(5).setReg(ARM::CPSR);
11715 MIB->getOperand(5).setIsDef(true);
11716 }
11717 BuildMI(*BB, BB->end(), dl,
11718 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
11719 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
11720
11721 // loopMBB can loop back to loopMBB or fall through to exitMBB.
11722 BB->addSuccessor(loopMBB);
11723 BB->addSuccessor(exitMBB);
11724
11725 // Add epilogue to handle BytesLeft.
11726 BB = exitMBB;
11727 auto StartOfExit = exitMBB->begin();
11728
11729 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11730 // [destOut] = STRB_POST(scratch, destLoop, 1)
11731 unsigned srcIn = srcLoop;
11732 unsigned destIn = destLoop;
11733 for (unsigned i = 0; i < BytesLeft; i++) {
11734 Register srcOut = MRI.createVirtualRegister(TRC);
11735 Register destOut = MRI.createVirtualRegister(TRC);
11736 Register scratch = MRI.createVirtualRegister(TRC);
11737 emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
11738 IsThumb1, IsThumb2);
11739 emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
11740 IsThumb1, IsThumb2);
11741 srcIn = srcOut;
11742 destIn = destOut;
11743 }
11744
11745 MI.eraseFromParent(); // The instruction is gone now.
11746 return BB;
11747}
11748
11750ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
11751 MachineBasicBlock *MBB) const {
11753 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
11754 DebugLoc DL = MI.getDebugLoc();
11755
11756 assert(Subtarget->isTargetWindows() &&
11757 "__chkstk is only supported on Windows");
11758 assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
11759
11760 // __chkstk takes the number of words to allocate on the stack in R4, and
11761 // returns the stack adjustment in number of bytes in R4. This will not
11762 // clober any other registers (other than the obvious lr).
11763 //
11764 // Although, technically, IP should be considered a register which may be
11765 // clobbered, the call itself will not touch it. Windows on ARM is a pure
11766 // thumb-2 environment, so there is no interworking required. As a result, we
11767 // do not expect a veneer to be emitted by the linker, clobbering IP.
11768 //
11769 // Each module receives its own copy of __chkstk, so no import thunk is
11770 // required, again, ensuring that IP is not clobbered.
11771 //
11772 // Finally, although some linkers may theoretically provide a trampoline for
11773 // out of range calls (which is quite common due to a 32M range limitation of
11774 // branches for Thumb), we can generate the long-call version via
11775 // -mcmodel=large, alleviating the need for the trampoline which may clobber
11776 // IP.
11777
11778 switch (TM.getCodeModel()) {
11779 case CodeModel::Tiny:
11780 llvm_unreachable("Tiny code model not available on ARM.");
11781 case CodeModel::Small:
11782 case CodeModel::Medium:
11783 case CodeModel::Kernel:
11784 BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
11786 .addExternalSymbol("__chkstk")
11789 .addReg(ARM::R12,
11791 .addReg(ARM::CPSR,
11793 break;
11794 case CodeModel::Large: {
11796 Register Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11797
11798 BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
11799 .addExternalSymbol("__chkstk");
11802 .addReg(Reg, RegState::Kill)
11805 .addReg(ARM::R12,
11807 .addReg(ARM::CPSR,
11809 break;
11810 }
11811 }
11812
11813 BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
11814 .addReg(ARM::SP, RegState::Kill)
11815 .addReg(ARM::R4, RegState::Kill)
11818 .add(condCodeOp());
11819
11820 MI.eraseFromParent();
11821 return MBB;
11822}
11823
11825ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
11826 MachineBasicBlock *MBB) const {
11827 DebugLoc DL = MI.getDebugLoc();
11828 MachineFunction *MF = MBB->getParent();
11829 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11830
11832 MF->insert(++MBB->getIterator(), ContBB);
11833 ContBB->splice(ContBB->begin(), MBB,
11834 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11836 MBB->addSuccessor(ContBB);
11837
11839 BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
11840 MF->push_back(TrapBB);
11841 MBB->addSuccessor(TrapBB);
11842
11843 BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
11844 .addReg(MI.getOperand(0).getReg())
11845 .addImm(0)
11847 BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
11848 .addMBB(TrapBB)
11850 .addReg(ARM::CPSR);
11851
11852 MI.eraseFromParent();
11853 return ContBB;
11854}
11855
11856// The CPSR operand of SelectItr might be missing a kill marker
11857// because there were multiple uses of CPSR, and ISel didn't know
11858// which to mark. Figure out whether SelectItr should have had a
11859// kill marker, and set it if it should. Returns the correct kill
11860// marker value.
11863 const TargetRegisterInfo* TRI) {
11864 // Scan forward through BB for a use/def of CPSR.
11865 MachineBasicBlock::iterator miI(std::next(SelectItr));
11866 for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
11867 const MachineInstr& mi = *miI;
11868 if (mi.readsRegister(ARM::CPSR, /*TRI=*/nullptr))
11869 return false;
11870 if (mi.definesRegister(ARM::CPSR, /*TRI=*/nullptr))
11871 break; // Should have kill-flag - update below.
11872 }
11873
11874 // If we hit the end of the block, check whether CPSR is live into a
11875 // successor.
11876 if (miI == BB->end()) {
11877 for (MachineBasicBlock *Succ : BB->successors())
11878 if (Succ->isLiveIn(ARM::CPSR))
11879 return false;
11880 }
11881
11882 // We found a def, or hit the end of the basic block and CPSR wasn't live
11883 // out. SelectMI should have a kill flag on CPSR.
11884 SelectItr->addRegisterKilled(ARM::CPSR, TRI);
11885 return true;
11886}
11887
11888/// Adds logic in loop entry MBB to calculate loop iteration count and adds
11889/// t2WhileLoopSetup and t2WhileLoopStart to generate WLS loop
11891 MachineBasicBlock *TpLoopBody,
11892 MachineBasicBlock *TpExit, Register OpSizeReg,
11893 const TargetInstrInfo *TII, DebugLoc Dl,
11895 // Calculates loop iteration count = ceil(n/16) = (n + 15) >> 4.
11896 Register AddDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11897 BuildMI(TpEntry, Dl, TII->get(ARM::t2ADDri), AddDestReg)
11898 .addUse(OpSizeReg)
11899 .addImm(15)
11901 .addReg(0);
11902
11903 Register LsrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11904 BuildMI(TpEntry, Dl, TII->get(ARM::t2LSRri), LsrDestReg)
11905 .addUse(AddDestReg, RegState::Kill)
11906 .addImm(4)
11908 .addReg(0);
11909
11910 Register TotalIterationsReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11911 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopSetup), TotalIterationsReg)
11912 .addUse(LsrDestReg, RegState::Kill);
11913
11914 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopStart))
11915 .addUse(TotalIterationsReg)
11916 .addMBB(TpExit);
11917
11918 BuildMI(TpEntry, Dl, TII->get(ARM::t2B))
11919 .addMBB(TpLoopBody)
11921
11922 return TotalIterationsReg;
11923}
11924
11925/// Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and
11926/// t2DoLoopEnd. These are used by later passes to generate tail predicated
11927/// loops.
11928static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
11929 MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit,
11930 const TargetInstrInfo *TII, DebugLoc Dl,
11931 MachineRegisterInfo &MRI, Register OpSrcReg,
11932 Register OpDestReg, Register ElementCountReg,
11933 Register TotalIterationsReg, bool IsMemcpy) {
11934 // First insert 4 PHI nodes for: Current pointer to Src (if memcpy), Dest
11935 // array, loop iteration counter, predication counter.
11936
11937 Register SrcPhiReg, CurrSrcReg;
11938 if (IsMemcpy) {
11939 // Current position in the src array
11940 SrcPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11941 CurrSrcReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11942 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), SrcPhiReg)
11943 .addUse(OpSrcReg)
11944 .addMBB(TpEntry)
11945 .addUse(CurrSrcReg)
11946 .addMBB(TpLoopBody);
11947 }
11948
11949 // Current position in the dest array
11950 Register DestPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11951 Register CurrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11952 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), DestPhiReg)
11953 .addUse(OpDestReg)
11954 .addMBB(TpEntry)
11955 .addUse(CurrDestReg)
11956 .addMBB(TpLoopBody);
11957
11958 // Current loop counter
11959 Register LoopCounterPhiReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11960 Register RemainingLoopIterationsReg =
11961 MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11962 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), LoopCounterPhiReg)
11963 .addUse(TotalIterationsReg)
11964 .addMBB(TpEntry)
11965 .addUse(RemainingLoopIterationsReg)
11966 .addMBB(TpLoopBody);
11967
11968 // Predication counter
11969 Register PredCounterPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11970 Register RemainingElementsReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11971 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), PredCounterPhiReg)
11972 .addUse(ElementCountReg)
11973 .addMBB(TpEntry)
11974 .addUse(RemainingElementsReg)
11975 .addMBB(TpLoopBody);
11976
11977 // Pass predication counter to VCTP
11978 Register VccrReg = MRI.createVirtualRegister(&ARM::VCCRRegClass);
11979 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)
11980 .addUse(PredCounterPhiReg)
11982 .addReg(0)
11983 .addReg(0);
11984
11985 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)
11986 .addUse(PredCounterPhiReg)
11987 .addImm(16)
11989 .addReg(0);
11990
11991 // VLDRB (only if memcpy) and VSTRB instructions, predicated using VPR
11992 Register SrcValueReg;
11993 if (IsMemcpy) {
11994 SrcValueReg = MRI.createVirtualRegister(&ARM::MQPRRegClass);
11995 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VLDRBU8_post))
11996 .addDef(CurrSrcReg)
11997 .addDef(SrcValueReg)
11998 .addReg(SrcPhiReg)
11999 .addImm(16)
12001 .addUse(VccrReg)
12002 .addReg(0);
12003 } else
12004 SrcValueReg = OpSrcReg;
12005
12006 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VSTRBU8_post))
12007 .addDef(CurrDestReg)
12008 .addUse(SrcValueReg)
12009 .addReg(DestPhiReg)
12010 .addImm(16)
12012 .addUse(VccrReg)
12013 .addReg(0);
12014
12015 // Add the pseudoInstrs for decrementing the loop counter and marking the
12016 // end:t2DoLoopDec and t2DoLoopEnd
12017 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopDec), RemainingLoopIterationsReg)
12018 .addUse(LoopCounterPhiReg)
12019 .addImm(1);
12020
12021 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopEnd))
12022 .addUse(RemainingLoopIterationsReg)
12023 .addMBB(TpLoopBody);
12024
12025 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2B))
12026 .addMBB(TpExit)
12028}
12029
12032 MachineBasicBlock *BB) const {
12033 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
12034 DebugLoc dl = MI.getDebugLoc();
12035 bool isThumb2 = Subtarget->isThumb2();
12036 switch (MI.getOpcode()) {
12037 default: {
12038 MI.print(errs());
12039 llvm_unreachable("Unexpected instr type to insert");
12040 }
12041
12042 // Thumb1 post-indexed loads are really just single-register LDMs.
12043 case ARM::tLDR_postidx: {
12044 MachineOperand Def(MI.getOperand(1));
12045 BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
12046 .add(Def) // Rn_wb
12047 .add(MI.getOperand(2)) // Rn
12048 .add(MI.getOperand(3)) // PredImm
12049 .add(MI.getOperand(4)) // PredReg
12050 .add(MI.getOperand(0)) // Rt
12051 .cloneMemRefs(MI);
12052 MI.eraseFromParent();
12053 return BB;
12054 }
12055
12056 case ARM::MVE_MEMCPYLOOPINST:
12057 case ARM::MVE_MEMSETLOOPINST: {
12058
12059 // Transformation below expands MVE_MEMCPYLOOPINST/MVE_MEMSETLOOPINST Pseudo
12060 // into a Tail Predicated (TP) Loop. It adds the instructions to calculate
12061 // the iteration count =ceil(size_in_bytes/16)) in the TP entry block and
12062 // adds the relevant instructions in the TP loop Body for generation of a
12063 // WLSTP loop.
12064
12065 // Below is relevant portion of the CFG after the transformation.
12066 // The Machine Basic Blocks are shown along with branch conditions (in
12067 // brackets). Note that TP entry/exit MBBs depict the entry/exit of this
12068 // portion of the CFG and may not necessarily be the entry/exit of the
12069 // function.
12070
12071 // (Relevant) CFG after transformation:
12072 // TP entry MBB
12073 // |
12074 // |-----------------|
12075 // (n <= 0) (n > 0)
12076 // | |
12077 // | TP loop Body MBB<--|
12078 // | | |
12079 // \ |___________|
12080 // \ /
12081 // TP exit MBB
12082
12083 MachineFunction *MF = BB->getParent();
12084 MachineFunctionProperties &Properties = MF->getProperties();
12086
12087 Register OpDestReg = MI.getOperand(0).getReg();
12088 Register OpSrcReg = MI.getOperand(1).getReg();
12089 Register OpSizeReg = MI.getOperand(2).getReg();
12090
12091 // Allocate the required MBBs and add to parent function.
12092 MachineBasicBlock *TpEntry = BB;
12093 MachineBasicBlock *TpLoopBody = MF->CreateMachineBasicBlock();
12094 MachineBasicBlock *TpExit;
12095
12096 MF->push_back(TpLoopBody);
12097
12098 // If any instructions are present in the current block after
12099 // MVE_MEMCPYLOOPINST or MVE_MEMSETLOOPINST, split the current block and
12100 // move the instructions into the newly created exit block. If there are no
12101 // instructions add an explicit branch to the FallThrough block and then
12102 // split.
12103 //
12104 // The split is required for two reasons:
12105 // 1) A terminator(t2WhileLoopStart) will be placed at that site.
12106 // 2) Since a TPLoopBody will be added later, any phis in successive blocks
12107 // need to be updated. splitAt() already handles this.
12108 TpExit = BB->splitAt(MI, false);
12109 if (TpExit == BB) {
12110 assert(BB->canFallThrough() && "Exit Block must be Fallthrough of the "
12111 "block containing memcpy/memset Pseudo");
12112 TpExit = BB->getFallThrough();
12113 BuildMI(BB, dl, TII->get(ARM::t2B))
12114 .addMBB(TpExit)
12116 TpExit = BB->splitAt(MI, false);
12117 }
12118
12119 // Add logic for iteration count
12120 Register TotalIterationsReg =
12121 genTPEntry(TpEntry, TpLoopBody, TpExit, OpSizeReg, TII, dl, MRI);
12122
12123 // Add the vectorized (and predicated) loads/store instructions
12124 bool IsMemcpy = MI.getOpcode() == ARM::MVE_MEMCPYLOOPINST;
12125 genTPLoopBody(TpLoopBody, TpEntry, TpExit, TII, dl, MRI, OpSrcReg,
12126 OpDestReg, OpSizeReg, TotalIterationsReg, IsMemcpy);
12127
12128 // Required to avoid conflict with the MachineVerifier during testing.
12130
12131 // Connect the blocks
12132 TpEntry->addSuccessor(TpLoopBody);
12133 TpLoopBody->addSuccessor(TpLoopBody);
12134 TpLoopBody->addSuccessor(TpExit);
12135
12136 // Reorder for a more natural layout
12137 TpLoopBody->moveAfter(TpEntry);
12138 TpExit->moveAfter(TpLoopBody);
12139
12140 // Finally, remove the memcpy Pseudo Instruction
12141 MI.eraseFromParent();
12142
12143 // Return the exit block as it may contain other instructions requiring a
12144 // custom inserter
12145 return TpExit;
12146 }
12147
12148 // The Thumb2 pre-indexed stores have the same MI operands, they just
12149 // define them differently in the .td files from the isel patterns, so
12150 // they need pseudos.
12151 case ARM::t2STR_preidx:
12152 MI.setDesc(TII->get(ARM::t2STR_PRE));
12153 return BB;
12154 case ARM::t2STRB_preidx:
12155 MI.setDesc(TII->get(ARM::t2STRB_PRE));
12156 return BB;
12157 case ARM::t2STRH_preidx:
12158 MI.setDesc(TII->get(ARM::t2STRH_PRE));
12159 return BB;
12160
12161 case ARM::STRi_preidx:
12162 case ARM::STRBi_preidx: {
12163 unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
12164 : ARM::STRB_PRE_IMM;
12165 // Decode the offset.
12166 unsigned Offset = MI.getOperand(4).getImm();
12167 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
12169 if (isSub)
12170 Offset = -Offset;
12171
12172 MachineMemOperand *MMO = *MI.memoperands_begin();
12173 BuildMI(*BB, MI, dl, TII->get(NewOpc))
12174 .add(MI.getOperand(0)) // Rn_wb
12175 .add(MI.getOperand(1)) // Rt
12176 .add(MI.getOperand(2)) // Rn
12177 .addImm(Offset) // offset (skip GPR==zero_reg)
12178 .add(MI.getOperand(5)) // pred
12179 .add(MI.getOperand(6))
12180 .addMemOperand(MMO);
12181 MI.eraseFromParent();
12182 return BB;
12183 }
12184 case ARM::STRr_preidx:
12185 case ARM::STRBr_preidx:
12186 case ARM::STRH_preidx: {
12187 unsigned NewOpc;
12188 switch (MI.getOpcode()) {
12189 default: llvm_unreachable("unexpected opcode!");
12190 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
12191 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
12192 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
12193 }
12194 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
12195 for (const MachineOperand &MO : MI.operands())
12196 MIB.add(MO);
12197 MI.eraseFromParent();
12198 return BB;
12199 }
12200
12201 case ARM::tMOVCCr_pseudo: {
12202 // To "insert" a SELECT_CC instruction, we actually have to insert the
12203 // diamond control-flow pattern. The incoming instruction knows the
12204 // destination vreg to set, the condition code register to branch on, the
12205 // true/false values to select between, and a branch opcode to use.
12206 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12208
12209 // thisMBB:
12210 // ...
12211 // TrueVal = ...
12212 // cmpTY ccX, r1, r2
12213 // bCC copy1MBB
12214 // fallthrough --> copy0MBB
12215 MachineBasicBlock *thisMBB = BB;
12216 MachineFunction *F = BB->getParent();
12217 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12218 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12219 F->insert(It, copy0MBB);
12220 F->insert(It, sinkMBB);
12221
12222 // Set the call frame size on entry to the new basic blocks.
12223 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12224 copy0MBB->setCallFrameSize(CallFrameSize);
12225 sinkMBB->setCallFrameSize(CallFrameSize);
12226
12227 // Check whether CPSR is live past the tMOVCCr_pseudo.
12228 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
12229 if (!MI.killsRegister(ARM::CPSR, /*TRI=*/nullptr) &&
12230 !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
12231 copy0MBB->addLiveIn(ARM::CPSR);
12232 sinkMBB->addLiveIn(ARM::CPSR);
12233 }
12234
12235 // Transfer the remainder of BB and its successor edges to sinkMBB.
12236 sinkMBB->splice(sinkMBB->begin(), BB,
12237 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12239
12240 BB->addSuccessor(copy0MBB);
12241 BB->addSuccessor(sinkMBB);
12242
12243 BuildMI(BB, dl, TII->get(ARM::tBcc))
12244 .addMBB(sinkMBB)
12245 .addImm(MI.getOperand(3).getImm())
12246 .addReg(MI.getOperand(4).getReg());
12247
12248 // copy0MBB:
12249 // %FalseValue = ...
12250 // # fallthrough to sinkMBB
12251 BB = copy0MBB;
12252
12253 // Update machine-CFG edges
12254 BB->addSuccessor(sinkMBB);
12255
12256 // sinkMBB:
12257 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12258 // ...
12259 BB = sinkMBB;
12260 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
12261 .addReg(MI.getOperand(1).getReg())
12262 .addMBB(copy0MBB)
12263 .addReg(MI.getOperand(2).getReg())
12264 .addMBB(thisMBB);
12265
12266 MI.eraseFromParent(); // The pseudo instruction is gone now.
12267 return BB;
12268 }
12269
12270 case ARM::BCCi64:
12271 case ARM::BCCZi64: {
12272 // If there is an unconditional branch to the other successor, remove it.
12273 BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
12274
12275 // Compare both parts that make up the double comparison separately for
12276 // equality.
12277 bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
12278
12279 Register LHS1 = MI.getOperand(1).getReg();
12280 Register LHS2 = MI.getOperand(2).getReg();
12281 if (RHSisZero) {
12282 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12283 .addReg(LHS1)
12284 .addImm(0)
12286 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12287 .addReg(LHS2).addImm(0)
12288 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12289 } else {
12290 Register RHS1 = MI.getOperand(3).getReg();
12291 Register RHS2 = MI.getOperand(4).getReg();
12292 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12293 .addReg(LHS1)
12294 .addReg(RHS1)
12296 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12297 .addReg(LHS2).addReg(RHS2)
12298 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12299 }
12300
12301 MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
12302 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
12303 if (MI.getOperand(0).getImm() == ARMCC::NE)
12304 std::swap(destMBB, exitMBB);
12305
12306 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
12307 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
12308 if (isThumb2)
12309 BuildMI(BB, dl, TII->get(ARM::t2B))
12310 .addMBB(exitMBB)
12312 else
12313 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
12314
12315 MI.eraseFromParent(); // The pseudo instruction is gone now.
12316 return BB;
12317 }
12318
12319 case ARM::Int_eh_sjlj_setjmp:
12320 case ARM::Int_eh_sjlj_setjmp_nofp:
12321 case ARM::tInt_eh_sjlj_setjmp:
12322 case ARM::t2Int_eh_sjlj_setjmp:
12323 case ARM::t2Int_eh_sjlj_setjmp_nofp:
12324 return BB;
12325
12326 case ARM::Int_eh_sjlj_setup_dispatch:
12327 EmitSjLjDispatchBlock(MI, BB);
12328 return BB;
12329
12330 case ARM::ABS:
12331 case ARM::t2ABS: {
12332 // To insert an ABS instruction, we have to insert the
12333 // diamond control-flow pattern. The incoming instruction knows the
12334 // source vreg to test against 0, the destination vreg to set,
12335 // the condition code register to branch on, the
12336 // true/false values to select between, and a branch opcode to use.
12337 // It transforms
12338 // V1 = ABS V0
12339 // into
12340 // V2 = MOVS V0
12341 // BCC (branch to SinkBB if V0 >= 0)
12342 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
12343 // SinkBB: V1 = PHI(V2, V3)
12344 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12346 MachineFunction *Fn = BB->getParent();
12347 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12348 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12349 Fn->insert(BBI, RSBBB);
12350 Fn->insert(BBI, SinkBB);
12351
12352 Register ABSSrcReg = MI.getOperand(1).getReg();
12353 Register ABSDstReg = MI.getOperand(0).getReg();
12354 bool ABSSrcKIll = MI.getOperand(1).isKill();
12355 bool isThumb2 = Subtarget->isThumb2();
12357 // In Thumb mode S must not be specified if source register is the SP or
12358 // PC and if destination register is the SP, so restrict register class
12359 Register NewRsbDstReg = MRI.createVirtualRegister(
12360 isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
12361
12362 // Transfer the remainder of BB and its successor edges to sinkMBB.
12363 SinkBB->splice(SinkBB->begin(), BB,
12364 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12366
12367 BB->addSuccessor(RSBBB);
12368 BB->addSuccessor(SinkBB);
12369
12370 // fall through to SinkMBB
12371 RSBBB->addSuccessor(SinkBB);
12372
12373 // insert a cmp at the end of BB
12374 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12375 .addReg(ABSSrcReg)
12376 .addImm(0)
12378
12379 // insert a bcc with opposite CC to ARMCC::MI at the end of BB
12380 BuildMI(BB, dl,
12381 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
12383
12384 // insert rsbri in RSBBB
12385 // Note: BCC and rsbri will be converted into predicated rsbmi
12386 // by if-conversion pass
12387 BuildMI(*RSBBB, RSBBB->begin(), dl,
12388 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
12389 .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
12390 .addImm(0)
12392 .add(condCodeOp());
12393
12394 // insert PHI in SinkBB,
12395 // reuse ABSDstReg to not change uses of ABS instruction
12396 BuildMI(*SinkBB, SinkBB->begin(), dl,
12397 TII->get(ARM::PHI), ABSDstReg)
12398 .addReg(NewRsbDstReg).addMBB(RSBBB)
12399 .addReg(ABSSrcReg).addMBB(BB);
12400
12401 // remove ABS instruction
12402 MI.eraseFromParent();
12403
12404 // return last added BB
12405 return SinkBB;
12406 }
12407 case ARM::COPY_STRUCT_BYVAL_I32:
12408 ++NumLoopByVals;
12409 return EmitStructByval(MI, BB);
12410 case ARM::WIN__CHKSTK:
12411 return EmitLowered__chkstk(MI, BB);
12412 case ARM::WIN__DBZCHK:
12413 return EmitLowered__dbzchk(MI, BB);
12414 }
12415}
12416
12417/// Attaches vregs to MEMCPY that it will use as scratch registers
12418/// when it is expanded into LDM/STM. This is done as a post-isel lowering
12419/// instead of as a custom inserter because we need the use list from the SDNode.
12420static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
12421 MachineInstr &MI, const SDNode *Node) {
12422 bool isThumb1 = Subtarget->isThumb1Only();
12423
12424 DebugLoc DL = MI.getDebugLoc();
12425 MachineFunction *MF = MI.getParent()->getParent();
12427 MachineInstrBuilder MIB(*MF, MI);
12428
12429 // If the new dst/src is unused mark it as dead.
12430 if (!Node->hasAnyUseOfValue(0)) {
12431 MI.getOperand(0).setIsDead(true);
12432 }
12433 if (!Node->hasAnyUseOfValue(1)) {
12434 MI.getOperand(1).setIsDead(true);
12435 }
12436
12437 // The MEMCPY both defines and kills the scratch registers.
12438 for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
12439 Register TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
12440 : &ARM::GPRRegClass);
12442 }
12443}
12444
12446 SDNode *Node) const {
12447 if (MI.getOpcode() == ARM::MEMCPY) {
12448 attachMEMCPYScratchRegs(Subtarget, MI, Node);
12449 return;
12450 }
12451
12452 const MCInstrDesc *MCID = &MI.getDesc();
12453 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
12454 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
12455 // operand is still set to noreg. If needed, set the optional operand's
12456 // register to CPSR, and remove the redundant implicit def.
12457 //
12458 // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
12459
12460 // Rename pseudo opcodes.
12461 unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
12462 unsigned ccOutIdx;
12463 if (NewOpc) {
12464 const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
12465 MCID = &TII->get(NewOpc);
12466
12467 assert(MCID->getNumOperands() ==
12468 MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
12469 && "converted opcode should be the same except for cc_out"
12470 " (and, on Thumb1, pred)");
12471
12472 MI.setDesc(*MCID);
12473
12474 // Add the optional cc_out operand
12475 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
12476
12477 // On Thumb1, move all input operands to the end, then add the predicate
12478 if (Subtarget->isThumb1Only()) {
12479 for (unsigned c = MCID->getNumOperands() - 4; c--;) {
12480 MI.addOperand(MI.getOperand(1));
12481 MI.removeOperand(1);
12482 }
12483
12484 // Restore the ties
12485 for (unsigned i = MI.getNumOperands(); i--;) {
12486 const MachineOperand& op = MI.getOperand(i);
12487 if (op.isReg() && op.isUse()) {
12488 int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
12489 if (DefIdx != -1)
12490 MI.tieOperands(DefIdx, i);
12491 }
12492 }
12493
12495 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
12496 ccOutIdx = 1;
12497 } else
12498 ccOutIdx = MCID->getNumOperands() - 1;
12499 } else
12500 ccOutIdx = MCID->getNumOperands() - 1;
12501
12502 // Any ARM instruction that sets the 's' bit should specify an optional
12503 // "cc_out" operand in the last operand position.
12504 if (!MI.hasOptionalDef() || !MCID->operands()[ccOutIdx].isOptionalDef()) {
12505 assert(!NewOpc && "Optional cc_out operand required");
12506 return;
12507 }
12508 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
12509 // since we already have an optional CPSR def.
12510 bool definesCPSR = false;
12511 bool deadCPSR = false;
12512 for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
12513 ++i) {
12514 const MachineOperand &MO = MI.getOperand(i);
12515 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
12516 definesCPSR = true;
12517 if (MO.isDead())
12518 deadCPSR = true;
12519 MI.removeOperand(i);
12520 break;
12521 }
12522 }
12523 if (!definesCPSR) {
12524 assert(!NewOpc && "Optional cc_out operand required");
12525 return;
12526 }
12527 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
12528 if (deadCPSR) {
12529 assert(!MI.getOperand(ccOutIdx).getReg() &&
12530 "expect uninitialized optional cc_out operand");
12531 // Thumb1 instructions must have the S bit even if the CPSR is dead.
12532 if (!Subtarget->isThumb1Only())
12533 return;
12534 }
12535
12536 // If this instruction was defined with an optional CPSR def and its dag node
12537 // had a live implicit CPSR def, then activate the optional CPSR def.
12538 MachineOperand &MO = MI.getOperand(ccOutIdx);
12539 MO.setReg(ARM::CPSR);
12540 MO.setIsDef(true);
12541}
12542
12543//===----------------------------------------------------------------------===//
12544// ARM Optimization Hooks
12545//===----------------------------------------------------------------------===//
12546
12547// Helper function that checks if N is a null or all ones constant.
12548static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
12550}
12551
12552// Return true if N is conditionally 0 or all ones.
12553// Detects these expressions where cc is an i1 value:
12554//
12555// (select cc 0, y) [AllOnes=0]
12556// (select cc y, 0) [AllOnes=0]
12557// (zext cc) [AllOnes=0]
12558// (sext cc) [AllOnes=0/1]
12559// (select cc -1, y) [AllOnes=1]
12560// (select cc y, -1) [AllOnes=1]
12561//
12562// Invert is set when N is the null/all ones constant when CC is false.
12563// OtherOp is set to the alternative value of N.
12565 SDValue &CC, bool &Invert,
12566 SDValue &OtherOp,
12567 SelectionDAG &DAG) {
12568 switch (N->getOpcode()) {
12569 default: return false;
12570 case ISD::SELECT: {
12571 CC = N->getOperand(0);
12572 SDValue N1 = N->getOperand(1);
12573 SDValue N2 = N->getOperand(2);
12574 if (isZeroOrAllOnes(N1, AllOnes)) {
12575 Invert = false;
12576 OtherOp = N2;
12577 return true;
12578 }
12579 if (isZeroOrAllOnes(N2, AllOnes)) {
12580 Invert = true;
12581 OtherOp = N1;
12582 return true;
12583 }
12584 return false;
12585 }
12586 case ISD::ZERO_EXTEND:
12587 // (zext cc) can never be the all ones value.
12588 if (AllOnes)
12589 return false;
12590 [[fallthrough]];
12591 case ISD::SIGN_EXTEND: {
12592 SDLoc dl(N);
12593 EVT VT = N->getValueType(0);
12594 CC = N->getOperand(0);
12595 if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
12596 return false;
12597 Invert = !AllOnes;
12598 if (AllOnes)
12599 // When looking for an AllOnes constant, N is an sext, and the 'other'
12600 // value is 0.
12601 OtherOp = DAG.getConstant(0, dl, VT);
12602 else if (N->getOpcode() == ISD::ZERO_EXTEND)
12603 // When looking for a 0 constant, N can be zext or sext.
12604 OtherOp = DAG.getConstant(1, dl, VT);
12605 else
12606 OtherOp = DAG.getAllOnesConstant(dl, VT);
12607 return true;
12608 }
12609 }
12610}
12611
12612// Combine a constant select operand into its use:
12613//
12614// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
12615// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
12616// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
12617// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
12618// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
12619//
12620// The transform is rejected if the select doesn't have a constant operand that
12621// is null, or all ones when AllOnes is set.
12622//
12623// Also recognize sext/zext from i1:
12624//
12625// (add (zext cc), x) -> (select cc (add x, 1), x)
12626// (add (sext cc), x) -> (select cc (add x, -1), x)
12627//
12628// These transformations eventually create predicated instructions.
12629//
12630// @param N The node to transform.
12631// @param Slct The N operand that is a select.
12632// @param OtherOp The other N operand (x above).
12633// @param DCI Context.
12634// @param AllOnes Require the select constant to be all ones instead of null.
12635// @returns The new node, or SDValue() on failure.
12636static
12639 bool AllOnes = false) {
12640 SelectionDAG &DAG = DCI.DAG;
12641 EVT VT = N->getValueType(0);
12642 SDValue NonConstantVal;
12643 SDValue CCOp;
12644 bool SwapSelectOps;
12645 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
12646 NonConstantVal, DAG))
12647 return SDValue();
12648
12649 // Slct is now know to be the desired identity constant when CC is true.
12650 SDValue TrueVal = OtherOp;
12651 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
12652 OtherOp, NonConstantVal);
12653 // Unless SwapSelectOps says CC should be false.
12654 if (SwapSelectOps)
12655 std::swap(TrueVal, FalseVal);
12656
12657 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12658 CCOp, TrueVal, FalseVal);
12659}
12660
12661// Attempt combineSelectAndUse on each operand of a commutative operator N.
12662static
12665 SDValue N0 = N->getOperand(0);
12666 SDValue N1 = N->getOperand(1);
12667 if (N0.getNode()->hasOneUse())
12668 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
12669 return Result;
12670 if (N1.getNode()->hasOneUse())
12671 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
12672 return Result;
12673 return SDValue();
12674}
12675
12677 // VUZP shuffle node.
12678 if (N->getOpcode() == ARMISD::VUZP)
12679 return true;
12680
12681 // "VUZP" on i32 is an alias for VTRN.
12682 if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
12683 return true;
12684
12685 return false;
12686}
12687
12690 const ARMSubtarget *Subtarget) {
12691 // Look for ADD(VUZP.0, VUZP.1).
12692 if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
12693 N0 == N1)
12694 return SDValue();
12695
12696 // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
12697 if (!N->getValueType(0).is64BitVector())
12698 return SDValue();
12699
12700 // Generate vpadd.
12701 SelectionDAG &DAG = DCI.DAG;
12702 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12703 SDLoc dl(N);
12704 SDNode *Unzip = N0.getNode();
12705 EVT VT = N->getValueType(0);
12706
12708 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
12709 TLI.getPointerTy(DAG.getDataLayout())));
12710 Ops.push_back(Unzip->getOperand(0));
12711 Ops.push_back(Unzip->getOperand(1));
12712
12713 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12714}
12715
12718 const ARMSubtarget *Subtarget) {
12719 // Check for two extended operands.
12720 if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
12721 N1.getOpcode() == ISD::SIGN_EXTEND) &&
12722 !(N0.getOpcode() == ISD::ZERO_EXTEND &&
12723 N1.getOpcode() == ISD::ZERO_EXTEND))
12724 return SDValue();
12725
12726 SDValue N00 = N0.getOperand(0);
12727 SDValue N10 = N1.getOperand(0);
12728
12729 // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
12730 if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
12731 N00 == N10)
12732 return SDValue();
12733
12734 // We only recognize Q register paddl here; this can't be reached until
12735 // after type legalization.
12736 if (!N00.getValueType().is64BitVector() ||
12738 return SDValue();
12739
12740 // Generate vpaddl.
12741 SelectionDAG &DAG = DCI.DAG;
12742 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12743 SDLoc dl(N);
12744 EVT VT = N->getValueType(0);
12745
12747 // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
12748 unsigned Opcode;
12749 if (N0.getOpcode() == ISD::SIGN_EXTEND)
12750 Opcode = Intrinsic::arm_neon_vpaddls;
12751 else
12752 Opcode = Intrinsic::arm_neon_vpaddlu;
12753 Ops.push_back(DAG.getConstant(Opcode, dl,
12754 TLI.getPointerTy(DAG.getDataLayout())));
12755 EVT ElemTy = N00.getValueType().getVectorElementType();
12756 unsigned NumElts = VT.getVectorNumElements();
12757 EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
12758 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
12759 N00.getOperand(0), N00.getOperand(1));
12760 Ops.push_back(Concat);
12761
12762 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12763}
12764
12765// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
12766// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
12767// much easier to match.
12768static SDValue
12771 const ARMSubtarget *Subtarget) {
12772 // Only perform optimization if after legalize, and if NEON is available. We
12773 // also expected both operands to be BUILD_VECTORs.
12774 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
12775 || N0.getOpcode() != ISD::BUILD_VECTOR
12776 || N1.getOpcode() != ISD::BUILD_VECTOR)
12777 return SDValue();
12778
12779 // Check output type since VPADDL operand elements can only be 8, 16, or 32.
12780 EVT VT = N->getValueType(0);
12781 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
12782 return SDValue();
12783
12784 // Check that the vector operands are of the right form.
12785 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
12786 // operands, where N is the size of the formed vector.
12787 // Each EXTRACT_VECTOR should have the same input vector and odd or even
12788 // index such that we have a pair wise add pattern.
12789
12790 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
12792 return SDValue();
12793 SDValue Vec = N0->getOperand(0)->getOperand(0);
12794 SDNode *V = Vec.getNode();
12795 unsigned nextIndex = 0;
12796
12797 // For each operands to the ADD which are BUILD_VECTORs,
12798 // check to see if each of their operands are an EXTRACT_VECTOR with
12799 // the same vector and appropriate index.
12800 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
12803
12804 SDValue ExtVec0 = N0->getOperand(i);
12805 SDValue ExtVec1 = N1->getOperand(i);
12806
12807 // First operand is the vector, verify its the same.
12808 if (V != ExtVec0->getOperand(0).getNode() ||
12809 V != ExtVec1->getOperand(0).getNode())
12810 return SDValue();
12811
12812 // Second is the constant, verify its correct.
12813 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
12814 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
12815
12816 // For the constant, we want to see all the even or all the odd.
12817 if (!C0 || !C1 || C0->getZExtValue() != nextIndex
12818 || C1->getZExtValue() != nextIndex+1)
12819 return SDValue();
12820
12821 // Increment index.
12822 nextIndex+=2;
12823 } else
12824 return SDValue();
12825 }
12826
12827 // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
12828 // we're using the entire input vector, otherwise there's a size/legality
12829 // mismatch somewhere.
12830 if (nextIndex != Vec.getValueType().getVectorNumElements() ||
12832 return SDValue();
12833
12834 // Create VPADDL node.
12835 SelectionDAG &DAG = DCI.DAG;
12836 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12837
12838 SDLoc dl(N);
12839
12840 // Build operand list.
12842 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
12843 TLI.getPointerTy(DAG.getDataLayout())));
12844
12845 // Input is the vector.
12846 Ops.push_back(Vec);
12847
12848 // Get widened type and narrowed type.
12849 MVT widenType;
12850 unsigned numElem = VT.getVectorNumElements();
12851
12852 EVT inputLaneType = Vec.getValueType().getVectorElementType();
12853 switch (inputLaneType.getSimpleVT().SimpleTy) {
12854 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
12855 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
12856 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
12857 default:
12858 llvm_unreachable("Invalid vector element type for padd optimization.");
12859 }
12860
12861 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
12862 unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
12863 return DAG.getNode(ExtOp, dl, VT, tmp);
12864}
12865
12867 if (V->getOpcode() == ISD::UMUL_LOHI ||
12868 V->getOpcode() == ISD::SMUL_LOHI)
12869 return V;
12870 return SDValue();
12871}
12872
12873static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
12875 const ARMSubtarget *Subtarget) {
12876 if (!Subtarget->hasBaseDSP())
12877 return SDValue();
12878
12879 // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
12880 // accumulates the product into a 64-bit value. The 16-bit values will
12881 // be sign extended somehow or SRA'd into 32-bit values
12882 // (addc (adde (mul 16bit, 16bit), lo), hi)
12883 SDValue Mul = AddcNode->getOperand(0);
12884 SDValue Lo = AddcNode->getOperand(1);
12885 if (Mul.getOpcode() != ISD::MUL) {
12886 Lo = AddcNode->getOperand(0);
12887 Mul = AddcNode->getOperand(1);
12888 if (Mul.getOpcode() != ISD::MUL)
12889 return SDValue();
12890 }
12891
12892 SDValue SRA = AddeNode->getOperand(0);
12893 SDValue Hi = AddeNode->getOperand(1);
12894 if (SRA.getOpcode() != ISD::SRA) {
12895 SRA = AddeNode->getOperand(1);
12896 Hi = AddeNode->getOperand(0);
12897 if (SRA.getOpcode() != ISD::SRA)
12898 return SDValue();
12899 }
12900 if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
12901 if (Const->getZExtValue() != 31)
12902 return SDValue();
12903 } else
12904 return SDValue();
12905
12906 if (SRA.getOperand(0) != Mul)
12907 return SDValue();
12908
12909 SelectionDAG &DAG = DCI.DAG;
12910 SDLoc dl(AddcNode);
12911 unsigned Opcode = 0;
12912 SDValue Op0;
12913 SDValue Op1;
12914
12915 if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
12916 Opcode = ARMISD::SMLALBB;
12917 Op0 = Mul.getOperand(0);
12918 Op1 = Mul.getOperand(1);
12919 } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
12920 Opcode = ARMISD::SMLALBT;
12921 Op0 = Mul.getOperand(0);
12922 Op1 = Mul.getOperand(1).getOperand(0);
12923 } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
12924 Opcode = ARMISD::SMLALTB;
12925 Op0 = Mul.getOperand(0).getOperand(0);
12926 Op1 = Mul.getOperand(1);
12927 } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
12928 Opcode = ARMISD::SMLALTT;
12929 Op0 = Mul->getOperand(0).getOperand(0);
12930 Op1 = Mul->getOperand(1).getOperand(0);
12931 }
12932
12933 if (!Op0 || !Op1)
12934 return SDValue();
12935
12936 SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
12937 Op0, Op1, Lo, Hi);
12938 // Replace the ADDs' nodes uses by the MLA node's values.
12939 SDValue HiMLALResult(SMLAL.getNode(), 1);
12940 SDValue LoMLALResult(SMLAL.getNode(), 0);
12941
12942 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
12943 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
12944
12945 // Return original node to notify the driver to stop replacing.
12946 SDValue resNode(AddcNode, 0);
12947 return resNode;
12948}
12949
12952 const ARMSubtarget *Subtarget) {
12953 // Look for multiply add opportunities.
12954 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
12955 // each add nodes consumes a value from ISD::UMUL_LOHI and there is
12956 // a glue link from the first add to the second add.
12957 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
12958 // a S/UMLAL instruction.
12959 // UMUL_LOHI
12960 // / :lo \ :hi
12961 // V \ [no multiline comment]
12962 // loAdd -> ADDC |
12963 // \ :carry /
12964 // V V
12965 // ADDE <- hiAdd
12966 //
12967 // In the special case where only the higher part of a signed result is used
12968 // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts
12969 // a constant with the exact value of 0x80000000, we recognize we are dealing
12970 // with a "rounded multiply and add" (or subtract) and transform it into
12971 // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.
12972
12973 assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||
12974 AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
12975 "Expect an ADDE or SUBE");
12976
12977 assert(AddeSubeNode->getNumOperands() == 3 &&
12978 AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&
12979 "ADDE node has the wrong inputs");
12980
12981 // Check that we are chained to the right ADDC or SUBC node.
12982 SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
12983 if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12984 AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
12985 (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
12986 AddcSubcNode->getOpcode() != ARMISD::SUBC))
12987 return SDValue();
12988
12989 SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
12990 SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
12991
12992 // Check if the two operands are from the same mul_lohi node.
12993 if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())
12994 return SDValue();
12995
12996 assert(AddcSubcNode->getNumValues() == 2 &&
12997 AddcSubcNode->getValueType(0) == MVT::i32 &&
12998 "Expect ADDC with two result values. First: i32");
12999
13000 // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
13001 // maybe a SMLAL which multiplies two 16-bit values.
13002 if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
13003 AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
13004 AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
13005 AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
13006 AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
13007 return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);
13008
13009 // Check for the triangle shape.
13010 SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
13011 SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
13012
13013 // Make sure that the ADDE/SUBE operands are not coming from the same node.
13014 if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())
13015 return SDValue();
13016
13017 // Find the MUL_LOHI node walking up ADDE/SUBE's operands.
13018 bool IsLeftOperandMUL = false;
13019 SDValue MULOp = findMUL_LOHI(AddeSubeOp0);
13020 if (MULOp == SDValue())
13021 MULOp = findMUL_LOHI(AddeSubeOp1);
13022 else
13023 IsLeftOperandMUL = true;
13024 if (MULOp == SDValue())
13025 return SDValue();
13026
13027 // Figure out the right opcode.
13028 unsigned Opc = MULOp->getOpcode();
13029 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
13030
13031 // Figure out the high and low input values to the MLAL node.
13032 SDValue *HiAddSub = nullptr;
13033 SDValue *LoMul = nullptr;
13034 SDValue *LowAddSub = nullptr;
13035
13036 // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.
13037 if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))
13038 return SDValue();
13039
13040 if (IsLeftOperandMUL)
13041 HiAddSub = &AddeSubeOp1;
13042 else
13043 HiAddSub = &AddeSubeOp0;
13044
13045 // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node
13046 // whose low result is fed to the ADDC/SUBC we are checking.
13047
13048 if (AddcSubcOp0 == MULOp.getValue(0)) {
13049 LoMul = &AddcSubcOp0;
13050 LowAddSub = &AddcSubcOp1;
13051 }
13052 if (AddcSubcOp1 == MULOp.getValue(0)) {
13053 LoMul = &AddcSubcOp1;
13054 LowAddSub = &AddcSubcOp0;
13055 }
13056
13057 if (!LoMul)
13058 return SDValue();
13059
13060 // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC
13061 // the replacement below will create a cycle.
13062 if (AddcSubcNode == HiAddSub->getNode() ||
13063 AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
13064 return SDValue();
13065
13066 // Create the merged node.
13067 SelectionDAG &DAG = DCI.DAG;
13068
13069 // Start building operand list.
13071 Ops.push_back(LoMul->getOperand(0));
13072 Ops.push_back(LoMul->getOperand(1));
13073
13074 // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be
13075 // the case, we must be doing signed multiplication and only use the higher
13076 // part of the result of the MLAL, furthermore the LowAddSub must be a constant
13077 // addition or subtraction with the value of 0x800000.
13078 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
13079 FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
13080 LowAddSub->getNode()->getOpcode() == ISD::Constant &&
13081 static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
13082 0x80000000) {
13083 Ops.push_back(*HiAddSub);
13084 if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
13085 FinalOpc = ARMISD::SMMLSR;
13086 } else {
13087 FinalOpc = ARMISD::SMMLAR;
13088 }
13089 SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);
13090 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);
13091
13092 return SDValue(AddeSubeNode, 0);
13093 } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
13094 // SMMLS is generated during instruction selection and the rest of this
13095 // function can not handle the case where AddcSubcNode is a SUBC.
13096 return SDValue();
13097
13098 // Finish building the operand list for {U/S}MLAL
13099 Ops.push_back(*LowAddSub);
13100 Ops.push_back(*HiAddSub);
13101
13102 SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),
13103 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13104
13105 // Replace the ADDs' nodes uses by the MLA node's values.
13106 SDValue HiMLALResult(MLALNode.getNode(), 1);
13107 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);
13108
13109 SDValue LoMLALResult(MLALNode.getNode(), 0);
13110 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);
13111
13112 // Return original node to notify the driver to stop replacing.
13113 return SDValue(AddeSubeNode, 0);
13114}
13115
13118 const ARMSubtarget *Subtarget) {
13119 // UMAAL is similar to UMLAL except that it adds two unsigned values.
13120 // While trying to combine for the other MLAL nodes, first search for the
13121 // chance to use UMAAL. Check if Addc uses a node which has already
13122 // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
13123 // as the addend, and it's handled in PerformUMLALCombine.
13124
13125 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13126 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13127
13128 // Check that we have a glued ADDC node.
13129 SDNode* AddcNode = AddeNode->getOperand(2).getNode();
13130 if (AddcNode->getOpcode() != ARMISD::ADDC)
13131 return SDValue();
13132
13133 // Find the converted UMAAL or quit if it doesn't exist.
13134 SDNode *UmlalNode = nullptr;
13135 SDValue AddHi;
13136 if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
13137 UmlalNode = AddcNode->getOperand(0).getNode();
13138 AddHi = AddcNode->getOperand(1);
13139 } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
13140 UmlalNode = AddcNode->getOperand(1).getNode();
13141 AddHi = AddcNode->getOperand(0);
13142 } else {
13143 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13144 }
13145
13146 // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
13147 // the ADDC as well as Zero.
13148 if (!isNullConstant(UmlalNode->getOperand(3)))
13149 return SDValue();
13150
13151 if ((isNullConstant(AddeNode->getOperand(0)) &&
13152 AddeNode->getOperand(1).getNode() == UmlalNode) ||
13153 (AddeNode->getOperand(0).getNode() == UmlalNode &&
13154 isNullConstant(AddeNode->getOperand(1)))) {
13155 SelectionDAG &DAG = DCI.DAG;
13156 SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
13157 UmlalNode->getOperand(2), AddHi };
13158 SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
13159 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13160
13161 // Replace the ADDs' nodes uses by the UMAAL node's values.
13162 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
13163 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
13164
13165 // Return original node to notify the driver to stop replacing.
13166 return SDValue(AddeNode, 0);
13167 }
13168 return SDValue();
13169}
13170
13172 const ARMSubtarget *Subtarget) {
13173 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13174 return SDValue();
13175
13176 // Check that we have a pair of ADDC and ADDE as operands.
13177 // Both addends of the ADDE must be zero.
13178 SDNode* AddcNode = N->getOperand(2).getNode();
13179 SDNode* AddeNode = N->getOperand(3).getNode();
13180 if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
13181 (AddeNode->getOpcode() == ARMISD::ADDE) &&
13182 isNullConstant(AddeNode->getOperand(0)) &&
13183 isNullConstant(AddeNode->getOperand(1)) &&
13184 (AddeNode->getOperand(2).getNode() == AddcNode))
13185 return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
13186 DAG.getVTList(MVT::i32, MVT::i32),
13187 {N->getOperand(0), N->getOperand(1),
13188 AddcNode->getOperand(0), AddcNode->getOperand(1)});
13189 else
13190 return SDValue();
13191}
13192
13195 const ARMSubtarget *Subtarget) {
13196 SelectionDAG &DAG(DCI.DAG);
13197
13198 if (N->getOpcode() == ARMISD::SUBC && N->hasAnyUseOfValue(1)) {
13199 // (SUBC (ADDE 0, 0, C), 1) -> C
13200 SDValue LHS = N->getOperand(0);
13201 SDValue RHS = N->getOperand(1);
13202 if (LHS->getOpcode() == ARMISD::ADDE &&
13203 isNullConstant(LHS->getOperand(0)) &&
13204 isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
13205 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
13206 }
13207 }
13208
13209 if (Subtarget->isThumb1Only()) {
13210 SDValue RHS = N->getOperand(1);
13211 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13212 int32_t imm = C->getSExtValue();
13213 if (imm < 0 && imm > std::numeric_limits<int>::min()) {
13214 SDLoc DL(N);
13215 RHS = DAG.getConstant(-imm, DL, MVT::i32);
13216 unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
13217 : ARMISD::ADDC;
13218 return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
13219 }
13220 }
13221 }
13222
13223 return SDValue();
13224}
13225
13228 const ARMSubtarget *Subtarget) {
13229 if (Subtarget->isThumb1Only()) {
13230 SelectionDAG &DAG = DCI.DAG;
13231 SDValue RHS = N->getOperand(1);
13232 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13233 int64_t imm = C->getSExtValue();
13234 if (imm < 0) {
13235 SDLoc DL(N);
13236
13237 // The with-carry-in form matches bitwise not instead of the negation.
13238 // Effectively, the inverse interpretation of the carry flag already
13239 // accounts for part of the negation.
13240 RHS = DAG.getConstant(~imm, DL, MVT::i32);
13241
13242 unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
13243 : ARMISD::ADDE;
13244 return DAG.getNode(Opcode, DL, N->getVTList(),
13245 N->getOperand(0), RHS, N->getOperand(2));
13246 }
13247 }
13248 } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
13249 return AddCombineTo64bitMLAL(N, DCI, Subtarget);
13250 }
13251 return SDValue();
13252}
13253
13256 const ARMSubtarget *Subtarget) {
13257 if (!Subtarget->hasMVEIntegerOps())
13258 return SDValue();
13259
13260 SDLoc dl(N);
13261 SDValue SetCC;
13262 SDValue LHS;
13263 SDValue RHS;
13265 SDValue TrueVal;
13266 SDValue FalseVal;
13267
13268 if (N->getOpcode() == ISD::SELECT &&
13269 N->getOperand(0)->getOpcode() == ISD::SETCC) {
13270 SetCC = N->getOperand(0);
13271 LHS = SetCC->getOperand(0);
13272 RHS = SetCC->getOperand(1);
13273 CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
13274 TrueVal = N->getOperand(1);
13275 FalseVal = N->getOperand(2);
13276 } else if (N->getOpcode() == ISD::SELECT_CC) {
13277 LHS = N->getOperand(0);
13278 RHS = N->getOperand(1);
13279 CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
13280 TrueVal = N->getOperand(2);
13281 FalseVal = N->getOperand(3);
13282 } else {
13283 return SDValue();
13284 }
13285
13286 unsigned int Opcode = 0;
13287 if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
13288 FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
13289 (CC == ISD::SETULT || CC == ISD::SETUGT)) {
13290 Opcode = ARMISD::VMINVu;
13291 if (CC == ISD::SETUGT)
13292 std::swap(TrueVal, FalseVal);
13293 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
13294 FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
13295 (CC == ISD::SETLT || CC == ISD::SETGT)) {
13296 Opcode = ARMISD::VMINVs;
13297 if (CC == ISD::SETGT)
13298 std::swap(TrueVal, FalseVal);
13299 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
13300 FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
13301 (CC == ISD::SETUGT || CC == ISD::SETULT)) {
13302 Opcode = ARMISD::VMAXVu;
13303 if (CC == ISD::SETULT)
13304 std::swap(TrueVal, FalseVal);
13305 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
13306 FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
13307 (CC == ISD::SETGT || CC == ISD::SETLT)) {
13308 Opcode = ARMISD::VMAXVs;
13309 if (CC == ISD::SETLT)
13310 std::swap(TrueVal, FalseVal);
13311 } else
13312 return SDValue();
13313
13314 // Normalise to the right hand side being the vector reduction
13315 switch (TrueVal->getOpcode()) {
13320 std::swap(LHS, RHS);
13321 std::swap(TrueVal, FalseVal);
13322 break;
13323 }
13324
13325 EVT VectorType = FalseVal->getOperand(0).getValueType();
13326
13327 if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&
13328 VectorType != MVT::v4i32)
13329 return SDValue();
13330
13331 EVT VectorScalarType = VectorType.getVectorElementType();
13332
13333 // The values being selected must also be the ones being compared
13334 if (TrueVal != LHS || FalseVal != RHS)
13335 return SDValue();
13336
13337 EVT LeftType = LHS->getValueType(0);
13338 EVT RightType = RHS->getValueType(0);
13339
13340 // The types must match the reduced type too
13341 if (LeftType != VectorScalarType || RightType != VectorScalarType)
13342 return SDValue();
13343
13344 // Legalise the scalar to an i32
13345 if (VectorScalarType != MVT::i32)
13346 LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
13347
13348 // Generate the reduction as an i32 for legalisation purposes
13349 auto Reduction =
13350 DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
13351
13352 // The result isn't actually an i32 so truncate it back to its original type
13353 if (VectorScalarType != MVT::i32)
13354 Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);
13355
13356 return Reduction;
13357}
13358
13359// A special combine for the vqdmulh family of instructions. This is one of the
13360// potential set of patterns that could patch this instruction. The base pattern
13361// you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))).
13362// This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))),
13363// which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as
13364// the max is unnecessary.
13366 EVT VT = N->getValueType(0);
13367 SDValue Shft;
13368 ConstantSDNode *Clamp;
13369
13370 if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
13371 return SDValue();
13372
13373 if (N->getOpcode() == ISD::SMIN) {
13374 Shft = N->getOperand(0);
13375 Clamp = isConstOrConstSplat(N->getOperand(1));
13376 } else if (N->getOpcode() == ISD::VSELECT) {
13377 // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin.
13378 SDValue Cmp = N->getOperand(0);
13379 if (Cmp.getOpcode() != ISD::SETCC ||
13380 cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
13381 Cmp.getOperand(0) != N->getOperand(1) ||
13382 Cmp.getOperand(1) != N->getOperand(2))
13383 return SDValue();
13384 Shft = N->getOperand(1);
13385 Clamp = isConstOrConstSplat(N->getOperand(2));
13386 } else
13387 return SDValue();
13388
13389 if (!Clamp)
13390 return SDValue();
13391
13392 MVT ScalarType;
13393 int ShftAmt = 0;
13394 switch (Clamp->getSExtValue()) {
13395 case (1 << 7) - 1:
13396 ScalarType = MVT::i8;
13397 ShftAmt = 7;
13398 break;
13399 case (1 << 15) - 1:
13400 ScalarType = MVT::i16;
13401 ShftAmt = 15;
13402 break;
13403 case (1ULL << 31) - 1:
13404 ScalarType = MVT::i32;
13405 ShftAmt = 31;
13406 break;
13407 default:
13408 return SDValue();
13409 }
13410
13411 if (Shft.getOpcode() != ISD::SRA)
13412 return SDValue();
13414 if (!N1 || N1->getSExtValue() != ShftAmt)
13415 return SDValue();
13416
13417 SDValue Mul = Shft.getOperand(0);
13418 if (Mul.getOpcode() != ISD::MUL)
13419 return SDValue();
13420
13421 SDValue Ext0 = Mul.getOperand(0);
13422 SDValue Ext1 = Mul.getOperand(1);
13423 if (Ext0.getOpcode() != ISD::SIGN_EXTEND ||
13424 Ext1.getOpcode() != ISD::SIGN_EXTEND)
13425 return SDValue();
13426 EVT VecVT = Ext0.getOperand(0).getValueType();
13427 if (!VecVT.isPow2VectorType() || VecVT.getVectorNumElements() == 1)
13428 return SDValue();
13429 if (Ext1.getOperand(0).getValueType() != VecVT ||
13430 VecVT.getScalarType() != ScalarType ||
13431 VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2)
13432 return SDValue();
13433
13434 SDLoc DL(Mul);
13435 unsigned LegalLanes = 128 / (ShftAmt + 1);
13436 EVT LegalVecVT = MVT::getVectorVT(ScalarType, LegalLanes);
13437 // For types smaller than legal vectors extend to be legal and only use needed
13438 // lanes.
13439 if (VecVT.getSizeInBits() < 128) {
13440 EVT ExtVecVT =
13442 VecVT.getVectorNumElements());
13443 SDValue Inp0 =
13444 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext0.getOperand(0));
13445 SDValue Inp1 =
13446 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext1.getOperand(0));
13447 Inp0 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp0);
13448 Inp1 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp1);
13449 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13450 SDValue Trunc = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, ExtVecVT, VQDMULH);
13451 Trunc = DAG.getNode(ISD::TRUNCATE, DL, VecVT, Trunc);
13452 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Trunc);
13453 }
13454
13455 // For larger types, split into legal sized chunks.
13456 assert(VecVT.getSizeInBits() % 128 == 0 && "Expected a power2 type");
13457 unsigned NumParts = VecVT.getSizeInBits() / 128;
13459 for (unsigned I = 0; I < NumParts; ++I) {
13460 SDValue Inp0 =
13461 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext0.getOperand(0),
13462 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13463 SDValue Inp1 =
13464 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext1.getOperand(0),
13465 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13466 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13467 Parts.push_back(VQDMULH);
13468 }
13469 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT,
13470 DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Parts));
13471}
13472
13475 const ARMSubtarget *Subtarget) {
13476 if (!Subtarget->hasMVEIntegerOps())
13477 return SDValue();
13478
13479 if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG))
13480 return V;
13481
13482 // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
13483 //
13484 // We need to re-implement this optimization here as the implementation in the
13485 // Target-Independent DAGCombiner does not handle the kind of constant we make
13486 // (it calls isConstOrConstSplat with AllowTruncation set to false - and for
13487 // good reason, allowing truncation there would break other targets).
13488 //
13489 // Currently, this is only done for MVE, as it's the only target that benefits
13490 // from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).
13491 if (N->getOperand(0).getOpcode() != ISD::XOR)
13492 return SDValue();
13493 SDValue XOR = N->getOperand(0);
13494
13495 // Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s.
13496 // It is important to check with truncation allowed as the BUILD_VECTORs we
13497 // generate in those situations will truncate their operands.
13498 ConstantSDNode *Const =
13499 isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,
13500 /*AllowTruncation*/ true);
13501 if (!Const || !Const->isOne())
13502 return SDValue();
13503
13504 // Rewrite into vselect(cond, rhs, lhs).
13505 SDValue Cond = XOR->getOperand(0);
13506 SDValue LHS = N->getOperand(1);
13507 SDValue RHS = N->getOperand(2);
13508 EVT Type = N->getValueType(0);
13509 return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);
13510}
13511
13512// Convert vsetcc([0,1,2,..], splat(n), ult) -> vctp n
13515 const ARMSubtarget *Subtarget) {
13516 SDValue Op0 = N->getOperand(0);
13517 SDValue Op1 = N->getOperand(1);
13518 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13519 EVT VT = N->getValueType(0);
13520
13521 if (!Subtarget->hasMVEIntegerOps() ||
13523 return SDValue();
13524
13525 if (CC == ISD::SETUGE) {
13526 std::swap(Op0, Op1);
13527 CC = ISD::SETULT;
13528 }
13529
13530 if (CC != ISD::SETULT || VT.getScalarSizeInBits() != 1 ||
13532 return SDValue();
13533
13534 // Check first operand is BuildVector of 0,1,2,...
13535 for (unsigned I = 0; I < VT.getVectorNumElements(); I++) {
13536 if (!Op0.getOperand(I).isUndef() &&
13537 !(isa<ConstantSDNode>(Op0.getOperand(I)) &&
13538 Op0.getConstantOperandVal(I) == I))
13539 return SDValue();
13540 }
13541
13542 // The second is a Splat of Op1S
13543 SDValue Op1S = DCI.DAG.getSplatValue(Op1);
13544 if (!Op1S)
13545 return SDValue();
13546
13547 unsigned Opc;
13548 switch (VT.getVectorNumElements()) {
13549 case 2:
13550 Opc = Intrinsic::arm_mve_vctp64;
13551 break;
13552 case 4:
13553 Opc = Intrinsic::arm_mve_vctp32;
13554 break;
13555 case 8:
13556 Opc = Intrinsic::arm_mve_vctp16;
13557 break;
13558 case 16:
13559 Opc = Intrinsic::arm_mve_vctp8;
13560 break;
13561 default:
13562 return SDValue();
13563 }
13564
13565 SDLoc DL(N);
13566 return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13567 DCI.DAG.getConstant(Opc, DL, MVT::i32),
13568 DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32));
13569}
13570
13571/// PerformADDECombine - Target-specific dag combine transform from
13572/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
13573/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
13576 const ARMSubtarget *Subtarget) {
13577 // Only ARM and Thumb2 support UMLAL/SMLAL.
13578 if (Subtarget->isThumb1Only())
13579 return PerformAddeSubeCombine(N, DCI, Subtarget);
13580
13581 // Only perform the checks after legalize when the pattern is available.
13582 if (DCI.isBeforeLegalize()) return SDValue();
13583
13584 return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
13585}
13586
13587/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
13588/// operands N0 and N1. This is a helper for PerformADDCombine that is
13589/// called with the default operands, and if that fails, with commuted
13590/// operands.
13593 const ARMSubtarget *Subtarget){
13594 // Attempt to create vpadd for this add.
13595 if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
13596 return Result;
13597
13598 // Attempt to create vpaddl for this add.
13599 if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
13600 return Result;
13601 if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
13602 Subtarget))
13603 return Result;
13604
13605 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
13606 if (N0.getNode()->hasOneUse())
13607 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
13608 return Result;
13609 return SDValue();
13610}
13611
13613 EVT VT = N->getValueType(0);
13614 SDValue N0 = N->getOperand(0);
13615 SDValue N1 = N->getOperand(1);
13616 SDLoc dl(N);
13617
13618 auto IsVecReduce = [](SDValue Op) {
13619 switch (Op.getOpcode()) {
13620 case ISD::VECREDUCE_ADD:
13621 case ARMISD::VADDVs:
13622 case ARMISD::VADDVu:
13623 case ARMISD::VMLAVs:
13624 case ARMISD::VMLAVu:
13625 return true;
13626 }
13627 return false;
13628 };
13629
13630 auto DistrubuteAddAddVecReduce = [&](SDValue N0, SDValue N1) {
13631 // Distribute add(X, add(vecreduce(Y), vecreduce(Z))) ->
13632 // add(add(X, vecreduce(Y)), vecreduce(Z))
13633 // to make better use of vaddva style instructions.
13634 if (VT == MVT::i32 && N1.getOpcode() == ISD::ADD && !IsVecReduce(N0) &&
13635 IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1)) &&
13636 !isa<ConstantSDNode>(N0) && N1->hasOneUse()) {
13637 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0, N1.getOperand(0));
13638 return DAG.getNode(ISD::ADD, dl, VT, Add0, N1.getOperand(1));
13639 }
13640 // And turn add(add(A, reduce(B)), add(C, reduce(D))) ->
13641 // add(add(add(A, C), reduce(B)), reduce(D))
13642 if (VT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
13643 N1.getOpcode() == ISD::ADD && N0->hasOneUse() && N1->hasOneUse()) {
13644 unsigned N0RedOp = 0;
13645 if (!IsVecReduce(N0.getOperand(N0RedOp))) {
13646 N0RedOp = 1;
13647 if (!IsVecReduce(N0.getOperand(N0RedOp)))
13648 return SDValue();
13649 }
13650
13651 unsigned N1RedOp = 0;
13652 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13653 N1RedOp = 1;
13654 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13655 return SDValue();
13656
13657 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0.getOperand(1 - N0RedOp),
13658 N1.getOperand(1 - N1RedOp));
13659 SDValue Add1 =
13660 DAG.getNode(ISD::ADD, dl, VT, Add0, N0.getOperand(N0RedOp));
13661 return DAG.getNode(ISD::ADD, dl, VT, Add1, N1.getOperand(N1RedOp));
13662 }
13663 return SDValue();
13664 };
13665 if (SDValue R = DistrubuteAddAddVecReduce(N0, N1))
13666 return R;
13667 if (SDValue R = DistrubuteAddAddVecReduce(N1, N0))
13668 return R;
13669
13670 // Distribute add(vecreduce(load(Y)), vecreduce(load(Z)))
13671 // Or add(add(X, vecreduce(load(Y))), vecreduce(load(Z)))
13672 // by ascending load offsets. This can help cores prefetch if the order of
13673 // loads is more predictable.
13674 auto DistrubuteVecReduceLoad = [&](SDValue N0, SDValue N1, bool IsForward) {
13675 // Check if two reductions are known to load data where one is before/after
13676 // another. Return negative if N0 loads data before N1, positive if N1 is
13677 // before N0 and 0 otherwise if nothing is known.
13678 auto IsKnownOrderedLoad = [&](SDValue N0, SDValue N1) {
13679 // Look through to the first operand of a MUL, for the VMLA case.
13680 // Currently only looks at the first operand, in the hope they are equal.
13681 if (N0.getOpcode() == ISD::MUL)
13682 N0 = N0.getOperand(0);
13683 if (N1.getOpcode() == ISD::MUL)
13684 N1 = N1.getOperand(0);
13685
13686 // Return true if the two operands are loads to the same object and the
13687 // offset of the first is known to be less than the offset of the second.
13688 LoadSDNode *Load0 = dyn_cast<LoadSDNode>(N0);
13689 LoadSDNode *Load1 = dyn_cast<LoadSDNode>(N1);
13690 if (!Load0 || !Load1 || Load0->getChain() != Load1->getChain() ||
13691 !Load0->isSimple() || !Load1->isSimple() || Load0->isIndexed() ||
13692 Load1->isIndexed())
13693 return 0;
13694
13695 auto BaseLocDecomp0 = BaseIndexOffset::match(Load0, DAG);
13696 auto BaseLocDecomp1 = BaseIndexOffset::match(Load1, DAG);
13697
13698 if (!BaseLocDecomp0.getBase() ||
13699 BaseLocDecomp0.getBase() != BaseLocDecomp1.getBase() ||
13700 !BaseLocDecomp0.hasValidOffset() || !BaseLocDecomp1.hasValidOffset())
13701 return 0;
13702 if (BaseLocDecomp0.getOffset() < BaseLocDecomp1.getOffset())
13703 return -1;
13704 if (BaseLocDecomp0.getOffset() > BaseLocDecomp1.getOffset())
13705 return 1;
13706 return 0;
13707 };
13708
13709 SDValue X;
13710 if (N0.getOpcode() == ISD::ADD && N0->hasOneUse()) {
13711 if (IsVecReduce(N0.getOperand(0)) && IsVecReduce(N0.getOperand(1))) {
13712 int IsBefore = IsKnownOrderedLoad(N0.getOperand(0).getOperand(0),
13713 N0.getOperand(1).getOperand(0));
13714 if (IsBefore < 0) {
13715 X = N0.getOperand(0);
13716 N0 = N0.getOperand(1);
13717 } else if (IsBefore > 0) {
13718 X = N0.getOperand(1);
13719 N0 = N0.getOperand(0);
13720 } else
13721 return SDValue();
13722 } else if (IsVecReduce(N0.getOperand(0))) {
13723 X = N0.getOperand(1);
13724 N0 = N0.getOperand(0);
13725 } else if (IsVecReduce(N0.getOperand(1))) {
13726 X = N0.getOperand(0);
13727 N0 = N0.getOperand(1);
13728 } else
13729 return SDValue();
13730 } else if (IsForward && IsVecReduce(N0) && IsVecReduce(N1) &&
13731 IsKnownOrderedLoad(N0.getOperand(0), N1.getOperand(0)) < 0) {
13732 // Note this is backward to how you would expect. We create
13733 // add(reduce(load + 16), reduce(load + 0)) so that the
13734 // add(reduce(load+16), X) is combined into VADDVA(X, load+16)), leaving
13735 // the X as VADDV(load + 0)
13736 return DAG.getNode(ISD::ADD, dl, VT, N1, N0);
13737 } else
13738 return SDValue();
13739
13740 if (!IsVecReduce(N0) || !IsVecReduce(N1))
13741 return SDValue();
13742
13743 if (IsKnownOrderedLoad(N1.getOperand(0), N0.getOperand(0)) >= 0)
13744 return SDValue();
13745
13746 // Switch from add(add(X, N0), N1) to add(add(X, N1), N0)
13747 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, X, N1);
13748 return DAG.getNode(ISD::ADD, dl, VT, Add0, N0);
13749 };
13750 if (SDValue R = DistrubuteVecReduceLoad(N0, N1, true))
13751 return R;
13752 if (SDValue R = DistrubuteVecReduceLoad(N1, N0, false))
13753 return R;
13754 return SDValue();
13755}
13756
13758 const ARMSubtarget *Subtarget) {
13759 if (!Subtarget->hasMVEIntegerOps())
13760 return SDValue();
13761
13763 return R;
13764
13765 EVT VT = N->getValueType(0);
13766 SDValue N0 = N->getOperand(0);
13767 SDValue N1 = N->getOperand(1);
13768 SDLoc dl(N);
13769
13770 if (VT != MVT::i64)
13771 return SDValue();
13772
13773 // We are looking for a i64 add of a VADDLVx. Due to these being i64's, this
13774 // will look like:
13775 // t1: i32,i32 = ARMISD::VADDLVs x
13776 // t2: i64 = build_pair t1, t1:1
13777 // t3: i64 = add t2, y
13778 // Otherwise we try to push the add up above VADDLVAx, to potentially allow
13779 // the add to be simplified separately.
13780 // We also need to check for sext / zext and commutitive adds.
13781 auto MakeVecReduce = [&](unsigned Opcode, unsigned OpcodeA, SDValue NA,
13782 SDValue NB) {
13783 if (NB->getOpcode() != ISD::BUILD_PAIR)
13784 return SDValue();
13785 SDValue VecRed = NB->getOperand(0);
13786 if ((VecRed->getOpcode() != Opcode && VecRed->getOpcode() != OpcodeA) ||
13787 VecRed.getResNo() != 0 ||
13788 NB->getOperand(1) != SDValue(VecRed.getNode(), 1))
13789 return SDValue();
13790
13791 if (VecRed->getOpcode() == OpcodeA) {
13792 // add(NA, VADDLVA(Inp), Y) -> VADDLVA(add(NA, Inp), Y)
13793 SDValue Inp = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
13794 VecRed.getOperand(0), VecRed.getOperand(1));
13795 NA = DAG.getNode(ISD::ADD, dl, MVT::i64, Inp, NA);
13796 }
13797
13799 std::tie(Ops[0], Ops[1]) = DAG.SplitScalar(NA, dl, MVT::i32, MVT::i32);
13800
13801 unsigned S = VecRed->getOpcode() == OpcodeA ? 2 : 0;
13802 for (unsigned I = S, E = VecRed.getNumOperands(); I < E; I++)
13803 Ops.push_back(VecRed->getOperand(I));
13804 SDValue Red =
13805 DAG.getNode(OpcodeA, dl, DAG.getVTList({MVT::i32, MVT::i32}), Ops);
13806 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Red,
13807 SDValue(Red.getNode(), 1));
13808 };
13809
13810 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N0, N1))
13811 return M;
13812 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N0, N1))
13813 return M;
13814 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N1, N0))
13815 return M;
13816 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N1, N0))
13817 return M;
13818 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N0, N1))
13819 return M;
13820 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N0, N1))
13821 return M;
13822 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N1, N0))
13823 return M;
13824 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N1, N0))
13825 return M;
13826 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N0, N1))
13827 return M;
13828 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N0, N1))
13829 return M;
13830 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N1, N0))
13831 return M;
13832 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
13833 return M;
13834 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
13835 return M;
13836 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
13837 return M;
13838 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
13839 return M;
13840 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
13841 return M;
13842 return SDValue();
13843}
13844
13845bool
13847 CombineLevel Level) const {
13848 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
13849 N->getOpcode() == ISD::SRL) &&
13850 "Expected shift op");
13851
13852 SDValue ShiftLHS = N->getOperand(0);
13853 if (!ShiftLHS->hasOneUse())
13854 return false;
13855
13856 if (ShiftLHS.getOpcode() == ISD::SIGN_EXTEND &&
13857 !ShiftLHS.getOperand(0)->hasOneUse())
13858 return false;
13859
13860 if (Level == BeforeLegalizeTypes)
13861 return true;
13862
13863 if (N->getOpcode() != ISD::SHL)
13864 return true;
13865
13866 if (Subtarget->isThumb1Only()) {
13867 // Avoid making expensive immediates by commuting shifts. (This logic
13868 // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted
13869 // for free.)
13870 if (N->getOpcode() != ISD::SHL)
13871 return true;
13872 SDValue N1 = N->getOperand(0);
13873 if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
13874 N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
13875 return true;
13876 if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
13877 if (Const->getAPIntValue().ult(256))
13878 return false;
13879 if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
13880 Const->getAPIntValue().sgt(-256))
13881 return false;
13882 }
13883 return true;
13884 }
13885
13886 // Turn off commute-with-shift transform after legalization, so it doesn't
13887 // conflict with PerformSHLSimplify. (We could try to detect when
13888 // PerformSHLSimplify would trigger more precisely, but it isn't
13889 // really necessary.)
13890 return false;
13891}
13892
13894 const SDNode *N) const {
13895 assert(N->getOpcode() == ISD::XOR &&
13896 (N->getOperand(0).getOpcode() == ISD::SHL ||
13897 N->getOperand(0).getOpcode() == ISD::SRL) &&
13898 "Expected XOR(SHIFT) pattern");
13899
13900 // Only commute if the entire NOT mask is a hidden shifted mask.
13901 auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
13902 auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
13903 if (XorC && ShiftC) {
13904 unsigned MaskIdx, MaskLen;
13905 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
13906 unsigned ShiftAmt = ShiftC->getZExtValue();
13907 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
13908 if (N->getOperand(0).getOpcode() == ISD::SHL)
13909 return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
13910 return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
13911 }
13912 }
13913
13914 return false;
13915}
13916
13918 const SDNode *N, CombineLevel Level) const {
13919 assert(((N->getOpcode() == ISD::SHL &&
13920 N->getOperand(0).getOpcode() == ISD::SRL) ||
13921 (N->getOpcode() == ISD::SRL &&
13922 N->getOperand(0).getOpcode() == ISD::SHL)) &&
13923 "Expected shift-shift mask");
13924
13925 if (!Subtarget->isThumb1Only())
13926 return true;
13927
13928 if (Level == BeforeLegalizeTypes)
13929 return true;
13930
13931 return false;
13932}
13933
13935 EVT VT) const {
13936 return Subtarget->hasMVEIntegerOps() && isTypeLegal(VT);
13937}
13938
13940 if (!Subtarget->hasNEON()) {
13941 if (Subtarget->isThumb1Only())
13942 return VT.getScalarSizeInBits() <= 32;
13943 return true;
13944 }
13945 return VT.isScalarInteger();
13946}
13947
13949 EVT VT) const {
13950 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
13951 return false;
13952
13953 switch (FPVT.getSimpleVT().SimpleTy) {
13954 case MVT::f16:
13955 return Subtarget->hasVFP2Base();
13956 case MVT::f32:
13957 return Subtarget->hasVFP2Base();
13958 case MVT::f64:
13959 return Subtarget->hasFP64();
13960 case MVT::v4f32:
13961 case MVT::v8f16:
13962 return Subtarget->hasMVEFloatOps();
13963 default:
13964 return false;
13965 }
13966}
13967
13970 const ARMSubtarget *ST) {
13971 // Allow the generic combiner to identify potential bswaps.
13972 if (DCI.isBeforeLegalize())
13973 return SDValue();
13974
13975 // DAG combiner will fold:
13976 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
13977 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
13978 // Other code patterns that can be also be modified have the following form:
13979 // b + ((a << 1) | 510)
13980 // b + ((a << 1) & 510)
13981 // b + ((a << 1) ^ 510)
13982 // b + ((a << 1) + 510)
13983
13984 // Many instructions can perform the shift for free, but it requires both
13985 // the operands to be registers. If c1 << c2 is too large, a mov immediate
13986 // instruction will needed. So, unfold back to the original pattern if:
13987 // - if c1 and c2 are small enough that they don't require mov imms.
13988 // - the user(s) of the node can perform an shl
13989
13990 // No shifted operands for 16-bit instructions.
13991 if (ST->isThumb() && ST->isThumb1Only())
13992 return SDValue();
13993
13994 // Check that all the users could perform the shl themselves.
13995 for (auto *U : N->users()) {
13996 switch(U->getOpcode()) {
13997 default:
13998 return SDValue();
13999 case ISD::SUB:
14000 case ISD::ADD:
14001 case ISD::AND:
14002 case ISD::OR:
14003 case ISD::XOR:
14004 case ISD::SETCC:
14005 case ARMISD::CMP:
14006 // Check that the user isn't already using a constant because there
14007 // aren't any instructions that support an immediate operand and a
14008 // shifted operand.
14009 if (isa<ConstantSDNode>(U->getOperand(0)) ||
14010 isa<ConstantSDNode>(U->getOperand(1)))
14011 return SDValue();
14012
14013 // Check that it's not already using a shift.
14014 if (U->getOperand(0).getOpcode() == ISD::SHL ||
14015 U->getOperand(1).getOpcode() == ISD::SHL)
14016 return SDValue();
14017 break;
14018 }
14019 }
14020
14021 if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
14022 N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
14023 return SDValue();
14024
14025 if (N->getOperand(0).getOpcode() != ISD::SHL)
14026 return SDValue();
14027
14028 SDValue SHL = N->getOperand(0);
14029
14030 auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
14031 auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
14032 if (!C1ShlC2 || !C2)
14033 return SDValue();
14034
14035 APInt C2Int = C2->getAPIntValue();
14036 APInt C1Int = C1ShlC2->getAPIntValue();
14037 unsigned C2Width = C2Int.getBitWidth();
14038 if (C2Int.uge(C2Width))
14039 return SDValue();
14040 uint64_t C2Value = C2Int.getZExtValue();
14041
14042 // Check that performing a lshr will not lose any information.
14043 APInt Mask = APInt::getHighBitsSet(C2Width, C2Width - C2Value);
14044 if ((C1Int & Mask) != C1Int)
14045 return SDValue();
14046
14047 // Shift the first constant.
14048 C1Int.lshrInPlace(C2Int);
14049
14050 // The immediates are encoded as an 8-bit value that can be rotated.
14051 auto LargeImm = [](const APInt &Imm) {
14052 unsigned Zeros = Imm.countl_zero() + Imm.countr_zero();
14053 return Imm.getBitWidth() - Zeros > 8;
14054 };
14055
14056 if (LargeImm(C1Int) || LargeImm(C2Int))
14057 return SDValue();
14058
14059 SelectionDAG &DAG = DCI.DAG;
14060 SDLoc dl(N);
14061 SDValue X = SHL.getOperand(0);
14062 SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
14063 DAG.getConstant(C1Int, dl, MVT::i32));
14064 // Shift left to compensate for the lshr of C1Int.
14065 SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));
14066
14067 LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();
14068 SHL.dump(); N->dump());
14069 LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump());
14070 return Res;
14071}
14072
14073
14074/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
14075///
14078 const ARMSubtarget *Subtarget) {
14079 SDValue N0 = N->getOperand(0);
14080 SDValue N1 = N->getOperand(1);
14081
14082 // Only works one way, because it needs an immediate operand.
14083 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14084 return Result;
14085
14086 if (SDValue Result = PerformADDVecReduce(N, DCI.DAG, Subtarget))
14087 return Result;
14088
14089 // First try with the default operand order.
14090 if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
14091 return Result;
14092
14093 // If that didn't work, try again with the operands commuted.
14094 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
14095}
14096
14097// Combine (sub 0, (csinc X, Y, CC)) -> (csinv -X, Y, CC)
14098// providing -X is as cheap as X (currently, just a constant).
14100 if (N->getValueType(0) != MVT::i32 || !isNullConstant(N->getOperand(0)))
14101 return SDValue();
14102 SDValue CSINC = N->getOperand(1);
14103 if (CSINC.getOpcode() != ARMISD::CSINC || !CSINC.hasOneUse())
14104 return SDValue();
14105
14106 ConstantSDNode *X = dyn_cast<ConstantSDNode>(CSINC.getOperand(0));
14107 if (!X)
14108 return SDValue();
14109
14110 return DAG.getNode(ARMISD::CSINV, SDLoc(N), MVT::i32,
14111 DAG.getNode(ISD::SUB, SDLoc(N), MVT::i32, N->getOperand(0),
14112 CSINC.getOperand(0)),
14113 CSINC.getOperand(1), CSINC.getOperand(2),
14114 CSINC.getOperand(3));
14115}
14116
14117/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
14118///
14121 const ARMSubtarget *Subtarget) {
14122 SDValue N0 = N->getOperand(0);
14123 SDValue N1 = N->getOperand(1);
14124
14125 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
14126 if (N1.getNode()->hasOneUse())
14127 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
14128 return Result;
14129
14130 if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))
14131 return R;
14132
14133 if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
14134 return SDValue();
14135
14136 // Fold (sub (ARMvmovImm 0), (ARMvdup x)) -> (ARMvdup (sub 0, x))
14137 // so that we can readily pattern match more mve instructions which can use
14138 // a scalar operand.
14139 SDValue VDup = N->getOperand(1);
14140 if (VDup->getOpcode() != ARMISD::VDUP)
14141 return SDValue();
14142
14143 SDValue VMov = N->getOperand(0);
14144 if (VMov->getOpcode() == ISD::BITCAST)
14145 VMov = VMov->getOperand(0);
14146
14147 if (VMov->getOpcode() != ARMISD::VMOVIMM || !isZeroVector(VMov))
14148 return SDValue();
14149
14150 SDLoc dl(N);
14151 SDValue Negate = DCI.DAG.getNode(ISD::SUB, dl, MVT::i32,
14152 DCI.DAG.getConstant(0, dl, MVT::i32),
14153 VDup->getOperand(0));
14154 return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0), Negate);
14155}
14156
14157/// PerformVMULCombine
14158/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
14159/// special multiplier accumulator forwarding.
14160/// vmul d3, d0, d2
14161/// vmla d3, d1, d2
14162/// is faster than
14163/// vadd d3, d0, d1
14164/// vmul d3, d3, d2
14165// However, for (A + B) * (A + B),
14166// vadd d2, d0, d1
14167// vmul d3, d0, d2
14168// vmla d3, d1, d2
14169// is slower than
14170// vadd d2, d0, d1
14171// vmul d3, d2, d2
14174 const ARMSubtarget *Subtarget) {
14175 if (!Subtarget->hasVMLxForwarding())
14176 return SDValue();
14177
14178 SelectionDAG &DAG = DCI.DAG;
14179 SDValue N0 = N->getOperand(0);
14180 SDValue N1 = N->getOperand(1);
14181 unsigned Opcode = N0.getOpcode();
14182 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14183 Opcode != ISD::FADD && Opcode != ISD::FSUB) {
14184 Opcode = N1.getOpcode();
14185 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14186 Opcode != ISD::FADD && Opcode != ISD::FSUB)
14187 return SDValue();
14188 std::swap(N0, N1);
14189 }
14190
14191 if (N0 == N1)
14192 return SDValue();
14193
14194 EVT VT = N->getValueType(0);
14195 SDLoc DL(N);
14196 SDValue N00 = N0->getOperand(0);
14197 SDValue N01 = N0->getOperand(1);
14198 return DAG.getNode(Opcode, DL, VT,
14199 DAG.getNode(ISD::MUL, DL, VT, N00, N1),
14200 DAG.getNode(ISD::MUL, DL, VT, N01, N1));
14201}
14202
14204 const ARMSubtarget *Subtarget) {
14205 EVT VT = N->getValueType(0);
14206 if (VT != MVT::v2i64)
14207 return SDValue();
14208
14209 SDValue N0 = N->getOperand(0);
14210 SDValue N1 = N->getOperand(1);
14211
14212 auto IsSignExt = [&](SDValue Op) {
14213 if (Op->getOpcode() != ISD::SIGN_EXTEND_INREG)
14214 return SDValue();
14215 EVT VT = cast<VTSDNode>(Op->getOperand(1))->getVT();
14216 if (VT.getScalarSizeInBits() == 32)
14217 return Op->getOperand(0);
14218 return SDValue();
14219 };
14220 auto IsZeroExt = [&](SDValue Op) {
14221 // Zero extends are a little more awkward. At the point we are matching
14222 // this, we are looking for an AND with a (-1, 0, -1, 0) buildvector mask.
14223 // That might be before of after a bitcast depending on how the and is
14224 // placed. Because this has to look through bitcasts, it is currently only
14225 // supported on LE.
14226 if (!Subtarget->isLittle())
14227 return SDValue();
14228
14229 SDValue And = Op;
14230 if (And->getOpcode() == ISD::BITCAST)
14231 And = And->getOperand(0);
14232 if (And->getOpcode() != ISD::AND)
14233 return SDValue();
14234 SDValue Mask = And->getOperand(1);
14235 if (Mask->getOpcode() == ISD::BITCAST)
14236 Mask = Mask->getOperand(0);
14237
14238 if (Mask->getOpcode() != ISD::BUILD_VECTOR ||
14239 Mask.getValueType() != MVT::v4i32)
14240 return SDValue();
14241 if (isAllOnesConstant(Mask->getOperand(0)) &&
14242 isNullConstant(Mask->getOperand(1)) &&
14243 isAllOnesConstant(Mask->getOperand(2)) &&
14244 isNullConstant(Mask->getOperand(3)))
14245 return And->getOperand(0);
14246 return SDValue();
14247 };
14248
14249 SDLoc dl(N);
14250 if (SDValue Op0 = IsSignExt(N0)) {
14251 if (SDValue Op1 = IsSignExt(N1)) {
14252 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14253 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14254 return DAG.getNode(ARMISD::VMULLs, dl, VT, New0a, New1a);
14255 }
14256 }
14257 if (SDValue Op0 = IsZeroExt(N0)) {
14258 if (SDValue Op1 = IsZeroExt(N1)) {
14259 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14260 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14261 return DAG.getNode(ARMISD::VMULLu, dl, VT, New0a, New1a);
14262 }
14263 }
14264
14265 return SDValue();
14266}
14267
14270 const ARMSubtarget *Subtarget) {
14271 SelectionDAG &DAG = DCI.DAG;
14272
14273 EVT VT = N->getValueType(0);
14274 if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)
14275 return PerformMVEVMULLCombine(N, DAG, Subtarget);
14276
14277 if (Subtarget->isThumb1Only())
14278 return SDValue();
14279
14280 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14281 return SDValue();
14282
14283 if (VT.is64BitVector() || VT.is128BitVector())
14284 return PerformVMULCombine(N, DCI, Subtarget);
14285 if (VT != MVT::i32)
14286 return SDValue();
14287
14288 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14289 if (!C)
14290 return SDValue();
14291
14292 int64_t MulAmt = C->getSExtValue();
14293 unsigned ShiftAmt = llvm::countr_zero<uint64_t>(MulAmt);
14294
14295 ShiftAmt = ShiftAmt & (32 - 1);
14296 SDValue V = N->getOperand(0);
14297 SDLoc DL(N);
14298
14299 SDValue Res;
14300 MulAmt >>= ShiftAmt;
14301
14302 if (MulAmt >= 0) {
14303 if (llvm::has_single_bit<uint32_t>(MulAmt - 1)) {
14304 // (mul x, 2^N + 1) => (add (shl x, N), x)
14305 Res = DAG.getNode(ISD::ADD, DL, VT,
14306 V,
14307 DAG.getNode(ISD::SHL, DL, VT,
14308 V,
14309 DAG.getConstant(Log2_32(MulAmt - 1), DL,
14310 MVT::i32)));
14311 } else if (llvm::has_single_bit<uint32_t>(MulAmt + 1)) {
14312 // (mul x, 2^N - 1) => (sub (shl x, N), x)
14313 Res = DAG.getNode(ISD::SUB, DL, VT,
14314 DAG.getNode(ISD::SHL, DL, VT,
14315 V,
14316 DAG.getConstant(Log2_32(MulAmt + 1), DL,
14317 MVT::i32)),
14318 V);
14319 } else
14320 return SDValue();
14321 } else {
14322 uint64_t MulAmtAbs = -MulAmt;
14323 if (llvm::has_single_bit<uint32_t>(MulAmtAbs + 1)) {
14324 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
14325 Res = DAG.getNode(ISD::SUB, DL, VT,
14326 V,
14327 DAG.getNode(ISD::SHL, DL, VT,
14328 V,
14329 DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
14330 MVT::i32)));
14331 } else if (llvm::has_single_bit<uint32_t>(MulAmtAbs - 1)) {
14332 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
14333 Res = DAG.getNode(ISD::ADD, DL, VT,
14334 V,
14335 DAG.getNode(ISD::SHL, DL, VT,
14336 V,
14337 DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
14338 MVT::i32)));
14339 Res = DAG.getNode(ISD::SUB, DL, VT,
14340 DAG.getConstant(0, DL, MVT::i32), Res);
14341 } else
14342 return SDValue();
14343 }
14344
14345 if (ShiftAmt != 0)
14346 Res = DAG.getNode(ISD::SHL, DL, VT,
14347 Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
14348
14349 // Do not add new nodes to DAG combiner worklist.
14350 DCI.CombineTo(N, Res, false);
14351 return SDValue();
14352}
14353
14356 const ARMSubtarget *Subtarget) {
14357 // Allow DAGCombine to pattern-match before we touch the canonical form.
14358 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14359 return SDValue();
14360
14361 if (N->getValueType(0) != MVT::i32)
14362 return SDValue();
14363
14364 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14365 if (!N1C)
14366 return SDValue();
14367
14368 uint32_t C1 = (uint32_t)N1C->getZExtValue();
14369 // Don't transform uxtb/uxth.
14370 if (C1 == 255 || C1 == 65535)
14371 return SDValue();
14372
14373 SDNode *N0 = N->getOperand(0).getNode();
14374 if (!N0->hasOneUse())
14375 return SDValue();
14376
14377 if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
14378 return SDValue();
14379
14380 bool LeftShift = N0->getOpcode() == ISD::SHL;
14381
14382 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14383 if (!N01C)
14384 return SDValue();
14385
14386 uint32_t C2 = (uint32_t)N01C->getZExtValue();
14387 if (!C2 || C2 >= 32)
14388 return SDValue();
14389
14390 // Clear irrelevant bits in the mask.
14391 if (LeftShift)
14392 C1 &= (-1U << C2);
14393 else
14394 C1 &= (-1U >> C2);
14395
14396 SelectionDAG &DAG = DCI.DAG;
14397 SDLoc DL(N);
14398
14399 // We have a pattern of the form "(and (shl x, c2) c1)" or
14400 // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
14401 // transform to a pair of shifts, to save materializing c1.
14402
14403 // First pattern: right shift, then mask off leading bits.
14404 // FIXME: Use demanded bits?
14405 if (!LeftShift && isMask_32(C1)) {
14406 uint32_t C3 = llvm::countl_zero(C1);
14407 if (C2 < C3) {
14408 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14409 DAG.getConstant(C3 - C2, DL, MVT::i32));
14410 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14411 DAG.getConstant(C3, DL, MVT::i32));
14412 }
14413 }
14414
14415 // First pattern, reversed: left shift, then mask off trailing bits.
14416 if (LeftShift && isMask_32(~C1)) {
14417 uint32_t C3 = llvm::countr_zero(C1);
14418 if (C2 < C3) {
14419 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14420 DAG.getConstant(C3 - C2, DL, MVT::i32));
14421 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14422 DAG.getConstant(C3, DL, MVT::i32));
14423 }
14424 }
14425
14426 // Second pattern: left shift, then mask off leading bits.
14427 // FIXME: Use demanded bits?
14428 if (LeftShift && isShiftedMask_32(C1)) {
14429 uint32_t Trailing = llvm::countr_zero(C1);
14430 uint32_t C3 = llvm::countl_zero(C1);
14431 if (Trailing == C2 && C2 + C3 < 32) {
14432 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14433 DAG.getConstant(C2 + C3, DL, MVT::i32));
14434 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14435 DAG.getConstant(C3, DL, MVT::i32));
14436 }
14437 }
14438
14439 // Second pattern, reversed: right shift, then mask off trailing bits.
14440 // FIXME: Handle other patterns of known/demanded bits.
14441 if (!LeftShift && isShiftedMask_32(C1)) {
14442 uint32_t Leading = llvm::countl_zero(C1);
14443 uint32_t C3 = llvm::countr_zero(C1);
14444 if (Leading == C2 && C2 + C3 < 32) {
14445 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14446 DAG.getConstant(C2 + C3, DL, MVT::i32));
14447 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14448 DAG.getConstant(C3, DL, MVT::i32));
14449 }
14450 }
14451
14452 // Transform "(and (shl x, c2) c1)" into "(shl (and x, c1>>c2), c2)"
14453 // if "c1 >> c2" is a cheaper immediate than "c1"
14454 if (LeftShift &&
14455 HasLowerConstantMaterializationCost(C1 >> C2, C1, Subtarget)) {
14456
14457 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i32, N0->getOperand(0),
14458 DAG.getConstant(C1 >> C2, DL, MVT::i32));
14459 return DAG.getNode(ISD::SHL, DL, MVT::i32, And,
14460 DAG.getConstant(C2, DL, MVT::i32));
14461 }
14462
14463 return SDValue();
14464}
14465
14468 const ARMSubtarget *Subtarget) {
14469 // Attempt to use immediate-form VBIC
14470 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14471 SDLoc dl(N);
14472 EVT VT = N->getValueType(0);
14473 SelectionDAG &DAG = DCI.DAG;
14474
14475 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v2i1 ||
14476 VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)
14477 return SDValue();
14478
14479 APInt SplatBits, SplatUndef;
14480 unsigned SplatBitSize;
14481 bool HasAnyUndefs;
14482 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14483 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14484 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14485 SplatBitSize == 64) {
14486 EVT VbicVT;
14487 SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
14488 SplatUndef.getZExtValue(), SplatBitSize,
14489 DAG, dl, VbicVT, VT, OtherModImm);
14490 if (Val.getNode()) {
14491 SDValue Input =
14492 DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VbicVT, N->getOperand(0));
14493 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
14494 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vbic);
14495 }
14496 }
14497 }
14498
14499 if (!Subtarget->isThumb1Only()) {
14500 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
14501 if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
14502 return Result;
14503
14504 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14505 return Result;
14506 }
14507
14508 if (Subtarget->isThumb1Only())
14509 if (SDValue Result = CombineANDShift(N, DCI, Subtarget))
14510 return Result;
14511
14512 return SDValue();
14513}
14514
14515// Try combining OR nodes to SMULWB, SMULWT.
14518 const ARMSubtarget *Subtarget) {
14519 if (!Subtarget->hasV6Ops() ||
14520 (Subtarget->isThumb() &&
14521 (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
14522 return SDValue();
14523
14524 SDValue SRL = OR->getOperand(0);
14525 SDValue SHL = OR->getOperand(1);
14526
14527 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
14528 SRL = OR->getOperand(1);
14529 SHL = OR->getOperand(0);
14530 }
14531 if (!isSRL16(SRL) || !isSHL16(SHL))
14532 return SDValue();
14533
14534 // The first operands to the shifts need to be the two results from the
14535 // same smul_lohi node.
14536 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
14537 SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
14538 return SDValue();
14539
14540 SDNode *SMULLOHI = SRL.getOperand(0).getNode();
14541 if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
14542 SHL.getOperand(0) != SDValue(SMULLOHI, 1))
14543 return SDValue();
14544
14545 // Now we have:
14546 // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
14547 // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
14548 // For SMUWB the 16-bit value will signed extended somehow.
14549 // For SMULWT only the SRA is required.
14550 // Check both sides of SMUL_LOHI
14551 SDValue OpS16 = SMULLOHI->getOperand(0);
14552 SDValue OpS32 = SMULLOHI->getOperand(1);
14553
14554 SelectionDAG &DAG = DCI.DAG;
14555 if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
14556 OpS16 = OpS32;
14557 OpS32 = SMULLOHI->getOperand(0);
14558 }
14559
14560 SDLoc dl(OR);
14561 unsigned Opcode = 0;
14562 if (isS16(OpS16, DAG))
14563 Opcode = ARMISD::SMULWB;
14564 else if (isSRA16(OpS16)) {
14565 Opcode = ARMISD::SMULWT;
14566 OpS16 = OpS16->getOperand(0);
14567 }
14568 else
14569 return SDValue();
14570
14571 SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
14572 DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
14573 return SDValue(OR, 0);
14574}
14575
14578 const ARMSubtarget *Subtarget) {
14579 // BFI is only available on V6T2+
14580 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
14581 return SDValue();
14582
14583 EVT VT = N->getValueType(0);
14584 SDValue N0 = N->getOperand(0);
14585 SDValue N1 = N->getOperand(1);
14586 SelectionDAG &DAG = DCI.DAG;
14587 SDLoc DL(N);
14588 // 1) or (and A, mask), val => ARMbfi A, val, mask
14589 // iff (val & mask) == val
14590 //
14591 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14592 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
14593 // && mask == ~mask2
14594 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
14595 // && ~mask == mask2
14596 // (i.e., copy a bitfield value into another bitfield of the same width)
14597
14598 if (VT != MVT::i32)
14599 return SDValue();
14600
14601 SDValue N00 = N0.getOperand(0);
14602
14603 // The value and the mask need to be constants so we can verify this is
14604 // actually a bitfield set. If the mask is 0xffff, we can do better
14605 // via a movt instruction, so don't use BFI in that case.
14606 SDValue MaskOp = N0.getOperand(1);
14607 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
14608 if (!MaskC)
14609 return SDValue();
14610 unsigned Mask = MaskC->getZExtValue();
14611 if (Mask == 0xffff)
14612 return SDValue();
14613 SDValue Res;
14614 // Case (1): or (and A, mask), val => ARMbfi A, val, mask
14615 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
14616 if (N1C) {
14617 unsigned Val = N1C->getZExtValue();
14618 if ((Val & ~Mask) != Val)
14619 return SDValue();
14620
14621 if (ARM::isBitFieldInvertedMask(Mask)) {
14622 Val >>= llvm::countr_zero(~Mask);
14623
14624 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
14625 DAG.getConstant(Val, DL, MVT::i32),
14626 DAG.getConstant(Mask, DL, MVT::i32));
14627
14628 DCI.CombineTo(N, Res, false);
14629 // Return value from the original node to inform the combiner than N is
14630 // now dead.
14631 return SDValue(N, 0);
14632 }
14633 } else if (N1.getOpcode() == ISD::AND) {
14634 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14635 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14636 if (!N11C)
14637 return SDValue();
14638 unsigned Mask2 = N11C->getZExtValue();
14639
14640 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
14641 // as is to match.
14642 if (ARM::isBitFieldInvertedMask(Mask) &&
14643 (Mask == ~Mask2)) {
14644 // The pack halfword instruction works better for masks that fit it,
14645 // so use that when it's available.
14646 if (Subtarget->hasDSP() &&
14647 (Mask == 0xffff || Mask == 0xffff0000))
14648 return SDValue();
14649 // 2a
14650 unsigned amt = llvm::countr_zero(Mask2);
14651 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
14652 DAG.getConstant(amt, DL, MVT::i32));
14653 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
14654 DAG.getConstant(Mask, DL, MVT::i32));
14655 DCI.CombineTo(N, Res, false);
14656 // Return value from the original node to inform the combiner than N is
14657 // now dead.
14658 return SDValue(N, 0);
14659 } else if (ARM::isBitFieldInvertedMask(~Mask) &&
14660 (~Mask == Mask2)) {
14661 // The pack halfword instruction works better for masks that fit it,
14662 // so use that when it's available.
14663 if (Subtarget->hasDSP() &&
14664 (Mask2 == 0xffff || Mask2 == 0xffff0000))
14665 return SDValue();
14666 // 2b
14667 unsigned lsb = llvm::countr_zero(Mask);
14668 Res = DAG.getNode(ISD::SRL, DL, VT, N00,
14669 DAG.getConstant(lsb, DL, MVT::i32));
14670 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
14671 DAG.getConstant(Mask2, DL, MVT::i32));
14672 DCI.CombineTo(N, Res, false);
14673 // Return value from the original node to inform the combiner than N is
14674 // now dead.
14675 return SDValue(N, 0);
14676 }
14677 }
14678
14679 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
14680 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
14682 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
14683 // where lsb(mask) == #shamt and masked bits of B are known zero.
14684 SDValue ShAmt = N00.getOperand(1);
14685 unsigned ShAmtC = ShAmt->getAsZExtVal();
14686 unsigned LSB = llvm::countr_zero(Mask);
14687 if (ShAmtC != LSB)
14688 return SDValue();
14689
14690 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
14691 DAG.getConstant(~Mask, DL, MVT::i32));
14692
14693 DCI.CombineTo(N, Res, false);
14694 // Return value from the original node to inform the combiner than N is
14695 // now dead.
14696 return SDValue(N, 0);
14697 }
14698
14699 return SDValue();
14700}
14701
14702static bool isValidMVECond(unsigned CC, bool IsFloat) {
14703 switch (CC) {
14704 case ARMCC::EQ:
14705 case ARMCC::NE:
14706 case ARMCC::LE:
14707 case ARMCC::GT:
14708 case ARMCC::GE:
14709 case ARMCC::LT:
14710 return true;
14711 case ARMCC::HS:
14712 case ARMCC::HI:
14713 return !IsFloat;
14714 default:
14715 return false;
14716 };
14717}
14718
14720 if (N->getOpcode() == ARMISD::VCMP)
14721 return (ARMCC::CondCodes)N->getConstantOperandVal(2);
14722 else if (N->getOpcode() == ARMISD::VCMPZ)
14723 return (ARMCC::CondCodes)N->getConstantOperandVal(1);
14724 else
14725 llvm_unreachable("Not a VCMP/VCMPZ!");
14726}
14727
14730 return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint());
14731}
14732
14734 const ARMSubtarget *Subtarget) {
14735 // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
14736 // together with predicates
14737 EVT VT = N->getValueType(0);
14738 SDLoc DL(N);
14739 SDValue N0 = N->getOperand(0);
14740 SDValue N1 = N->getOperand(1);
14741
14742 auto IsFreelyInvertable = [&](SDValue V) {
14743 if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)
14744 return CanInvertMVEVCMP(V);
14745 return false;
14746 };
14747
14748 // At least one operand must be freely invertable.
14749 if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1)))
14750 return SDValue();
14751
14752 SDValue NewN0 = DAG.getLogicalNOT(DL, N0, VT);
14753 SDValue NewN1 = DAG.getLogicalNOT(DL, N1, VT);
14754 SDValue And = DAG.getNode(ISD::AND, DL, VT, NewN0, NewN1);
14755 return DAG.getLogicalNOT(DL, And, VT);
14756}
14757
14758/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
14761 const ARMSubtarget *Subtarget) {
14762 // Attempt to use immediate-form VORR
14763 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14764 SDLoc dl(N);
14765 EVT VT = N->getValueType(0);
14766 SelectionDAG &DAG = DCI.DAG;
14767
14768 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14769 return SDValue();
14770
14771 if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||
14772 VT == MVT::v8i1 || VT == MVT::v16i1))
14773 return PerformORCombine_i1(N, DAG, Subtarget);
14774
14775 APInt SplatBits, SplatUndef;
14776 unsigned SplatBitSize;
14777 bool HasAnyUndefs;
14778 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14779 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14780 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14781 SplatBitSize == 64) {
14782 EVT VorrVT;
14783 SDValue Val =
14784 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
14785 SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
14786 if (Val.getNode()) {
14787 SDValue Input =
14788 DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VorrVT, N->getOperand(0));
14789 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
14790 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vorr);
14791 }
14792 }
14793 }
14794
14795 if (!Subtarget->isThumb1Only()) {
14796 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
14797 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14798 return Result;
14799 if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
14800 return Result;
14801 }
14802
14803 SDValue N0 = N->getOperand(0);
14804 SDValue N1 = N->getOperand(1);
14805
14806 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
14807 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
14809
14810 // The code below optimizes (or (and X, Y), Z).
14811 // The AND operand needs to have a single user to make these optimizations
14812 // profitable.
14813 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
14814 return SDValue();
14815
14816 APInt SplatUndef;
14817 unsigned SplatBitSize;
14818 bool HasAnyUndefs;
14819
14820 APInt SplatBits0, SplatBits1;
14821 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
14822 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
14823 // Ensure that the second operand of both ands are constants
14824 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
14825 HasAnyUndefs) && !HasAnyUndefs) {
14826 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
14827 HasAnyUndefs) && !HasAnyUndefs) {
14828 // Ensure that the bit width of the constants are the same and that
14829 // the splat arguments are logical inverses as per the pattern we
14830 // are trying to simplify.
14831 if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
14832 SplatBits0 == ~SplatBits1) {
14833 // Canonicalize the vector type to make instruction selection
14834 // simpler.
14835 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
14836 SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
14837 N0->getOperand(1),
14838 N0->getOperand(0),
14839 N1->getOperand(0));
14840 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Result);
14841 }
14842 }
14843 }
14844 }
14845
14846 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
14847 // reasonable.
14848 if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
14849 if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
14850 return Res;
14851 }
14852
14853 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14854 return Result;
14855
14856 return SDValue();
14857}
14858
14861 const ARMSubtarget *Subtarget) {
14862 EVT VT = N->getValueType(0);
14863 SelectionDAG &DAG = DCI.DAG;
14864
14865 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14866 return SDValue();
14867
14868 if (!Subtarget->isThumb1Only()) {
14869 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
14870 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14871 return Result;
14872
14873 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14874 return Result;
14875 }
14876
14877 if (Subtarget->hasMVEIntegerOps()) {
14878 // fold (xor(vcmp/z, 1)) into a vcmp with the opposite condition.
14879 SDValue N0 = N->getOperand(0);
14880 SDValue N1 = N->getOperand(1);
14881 const TargetLowering *TLI = Subtarget->getTargetLowering();
14882 if (TLI->isConstTrueVal(N1) &&
14883 (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
14884 if (CanInvertMVEVCMP(N0)) {
14885 SDLoc DL(N0);
14887
14889 Ops.push_back(N0->getOperand(0));
14890 if (N0->getOpcode() == ARMISD::VCMP)
14891 Ops.push_back(N0->getOperand(1));
14892 Ops.push_back(DAG.getConstant(CC, DL, MVT::i32));
14893 return DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops);
14894 }
14895 }
14896 }
14897
14898 return SDValue();
14899}
14900
14901// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
14902// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
14903// their position in "to" (Rd).
14904static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
14905 assert(N->getOpcode() == ARMISD::BFI);
14906
14907 SDValue From = N->getOperand(1);
14908 ToMask = ~N->getConstantOperandAPInt(2);
14909 FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.popcount());
14910
14911 // If the Base came from a SHR #C, we can deduce that it is really testing bit
14912 // #C in the base of the SHR.
14913 if (From->getOpcode() == ISD::SRL &&
14914 isa<ConstantSDNode>(From->getOperand(1))) {
14915 APInt Shift = From->getConstantOperandAPInt(1);
14916 assert(Shift.getLimitedValue() < 32 && "Shift too large!");
14917 FromMask <<= Shift.getLimitedValue(31);
14918 From = From->getOperand(0);
14919 }
14920
14921 return From;
14922}
14923
14924// If A and B contain one contiguous set of bits, does A | B == A . B?
14925//
14926// Neither A nor B must be zero.
14927static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
14928 unsigned LastActiveBitInA = A.countr_zero();
14929 unsigned FirstActiveBitInB = B.getBitWidth() - B.countl_zero() - 1;
14930 return LastActiveBitInA - 1 == FirstActiveBitInB;
14931}
14932
14934 // We have a BFI in N. Find a BFI it can combine with, if one exists.
14935 APInt ToMask, FromMask;
14936 SDValue From = ParseBFI(N, ToMask, FromMask);
14937 SDValue To = N->getOperand(0);
14938
14939 SDValue V = To;
14940 if (V.getOpcode() != ARMISD::BFI)
14941 return SDValue();
14942
14943 APInt NewToMask, NewFromMask;
14944 SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
14945 if (NewFrom != From)
14946 return SDValue();
14947
14948 // Do the written bits conflict with any we've seen so far?
14949 if ((NewToMask & ToMask).getBoolValue())
14950 // Conflicting bits.
14951 return SDValue();
14952
14953 // Are the new bits contiguous when combined with the old bits?
14954 if (BitsProperlyConcatenate(ToMask, NewToMask) &&
14955 BitsProperlyConcatenate(FromMask, NewFromMask))
14956 return V;
14957 if (BitsProperlyConcatenate(NewToMask, ToMask) &&
14958 BitsProperlyConcatenate(NewFromMask, FromMask))
14959 return V;
14960
14961 return SDValue();
14962}
14963
14965 SDValue N0 = N->getOperand(0);
14966 SDValue N1 = N->getOperand(1);
14967
14968 if (N1.getOpcode() == ISD::AND) {
14969 // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
14970 // the bits being cleared by the AND are not demanded by the BFI.
14971 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14972 if (!N11C)
14973 return SDValue();
14974 unsigned InvMask = N->getConstantOperandVal(2);
14975 unsigned LSB = llvm::countr_zero(~InvMask);
14976 unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB;
14977 assert(Width <
14978 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
14979 "undefined behavior");
14980 unsigned Mask = (1u << Width) - 1;
14981 unsigned Mask2 = N11C->getZExtValue();
14982 if ((Mask & (~Mask2)) == 0)
14983 return DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
14984 N->getOperand(0), N1.getOperand(0), N->getOperand(2));
14985 return SDValue();
14986 }
14987
14988 // Look for another BFI to combine with.
14989 if (SDValue CombineBFI = FindBFIToCombineWith(N)) {
14990 // We've found a BFI.
14991 APInt ToMask1, FromMask1;
14992 SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
14993
14994 APInt ToMask2, FromMask2;
14995 SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
14996 assert(From1 == From2);
14997 (void)From2;
14998
14999 // Create a new BFI, combining the two together.
15000 APInt NewFromMask = FromMask1 | FromMask2;
15001 APInt NewToMask = ToMask1 | ToMask2;
15002
15003 EVT VT = N->getValueType(0);
15004 SDLoc dl(N);
15005
15006 if (NewFromMask[0] == 0)
15007 From1 = DAG.getNode(ISD::SRL, dl, VT, From1,
15008 DAG.getConstant(NewFromMask.countr_zero(), dl, VT));
15009 return DAG.getNode(ARMISD::BFI, dl, VT, CombineBFI.getOperand(0), From1,
15010 DAG.getConstant(~NewToMask, dl, VT));
15011 }
15012
15013 // Reassociate BFI(BFI (A, B, M1), C, M2) to BFI(BFI (A, C, M2), B, M1) so
15014 // that lower bit insertions are performed first, providing that M1 and M2
15015 // do no overlap. This can allow multiple BFI instructions to be combined
15016 // together by the other folds above.
15017 if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
15018 APInt ToMask1 = ~N->getConstantOperandAPInt(2);
15019 APInt ToMask2 = ~N0.getConstantOperandAPInt(2);
15020
15021 if (!N0.hasOneUse() || (ToMask1 & ToMask2) != 0 ||
15022 ToMask1.countl_zero() < ToMask2.countl_zero())
15023 return SDValue();
15024
15025 EVT VT = N->getValueType(0);
15026 SDLoc dl(N);
15027 SDValue BFI1 = DAG.getNode(ARMISD::BFI, dl, VT, N0.getOperand(0),
15028 N->getOperand(1), N->getOperand(2));
15029 return DAG.getNode(ARMISD::BFI, dl, VT, BFI1, N0.getOperand(1),
15030 N0.getOperand(2));
15031 }
15032
15033 return SDValue();
15034}
15035
15036// Check that N is CMPZ(CSINC(0, 0, CC, X)),
15037// or CMPZ(CMOV(1, 0, CC, X))
15038// return X if valid.
15040 if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
15041 return SDValue();
15042 SDValue CSInc = Cmp->getOperand(0);
15043
15044 // Ignore any `And 1` nodes that may not yet have been removed. We are
15045 // looking for a value that produces 1/0, so these have no effect on the
15046 // code.
15047 while (CSInc.getOpcode() == ISD::AND &&
15048 isa<ConstantSDNode>(CSInc.getOperand(1)) &&
15049 CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())
15050 CSInc = CSInc.getOperand(0);
15051
15052 if (CSInc.getOpcode() == ARMISD::CSINC &&
15053 isNullConstant(CSInc.getOperand(0)) &&
15054 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
15056 return CSInc.getOperand(3);
15057 }
15058 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) &&
15059 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
15061 return CSInc.getOperand(3);
15062 }
15063 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) &&
15064 isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
15067 return CSInc.getOperand(3);
15068 }
15069 return SDValue();
15070}
15071
15073 // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in
15074 // t92: flags = ARMISD::CMPZ t74, 0
15075 // t93: i32 = ARMISD::CSINC 0, 0, 1, t92
15076 // t96: flags = ARMISD::CMPZ t93, 0
15077 // t114: i32 = ARMISD::CSINV 0, 0, 0, t96
15079 if (SDValue C = IsCMPZCSINC(N, Cond))
15080 if (Cond == ARMCC::EQ)
15081 return C;
15082 return SDValue();
15083}
15084
15086 // Fold away an unneccessary CMPZ/CSINC
15087 // CSXYZ A, B, C1 (CMPZ (CSINC 0, 0, C2, D), 0) ->
15088 // if C1==EQ -> CSXYZ A, B, C2, D
15089 // if C1==NE -> CSXYZ A, B, NOT(C2), D
15091 if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
15092 if (N->getConstantOperandVal(2) == ARMCC::EQ)
15093 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15094 N->getOperand(1),
15095 DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
15096 if (N->getConstantOperandVal(2) == ARMCC::NE)
15097 return DAG.getNode(
15098 N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15099 N->getOperand(1),
15101 }
15102 return SDValue();
15103}
15104
15105/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
15106/// ARMISD::VMOVRRD.
15109 const ARMSubtarget *Subtarget) {
15110 // vmovrrd(vmovdrr x, y) -> x,y
15111 SDValue InDouble = N->getOperand(0);
15112 if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
15113 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
15114
15115 // vmovrrd(load f64) -> (load i32), (load i32)
15116 SDNode *InNode = InDouble.getNode();
15117 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
15118 InNode->getValueType(0) == MVT::f64 &&
15119 InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
15120 !cast<LoadSDNode>(InNode)->isVolatile()) {
15121 // TODO: Should this be done for non-FrameIndex operands?
15122 LoadSDNode *LD = cast<LoadSDNode>(InNode);
15123
15124 SelectionDAG &DAG = DCI.DAG;
15125 SDLoc DL(LD);
15126 SDValue BasePtr = LD->getBasePtr();
15127 SDValue NewLD1 =
15128 DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
15129 LD->getAlign(), LD->getMemOperand()->getFlags());
15130
15131 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
15132 DAG.getConstant(4, DL, MVT::i32));
15133
15134 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
15135 LD->getPointerInfo().getWithOffset(4),
15136 commonAlignment(LD->getAlign(), 4),
15137 LD->getMemOperand()->getFlags());
15138
15139 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
15140 if (DCI.DAG.getDataLayout().isBigEndian())
15141 std::swap (NewLD1, NewLD2);
15142 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
15143 return Result;
15144 }
15145
15146 // VMOVRRD(extract(..(build_vector(a, b, c, d)))) -> a,b or c,d
15147 // VMOVRRD(extract(insert_vector(insert_vector(.., a, l1), b, l2))) -> a,b
15148 if (InDouble.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15149 isa<ConstantSDNode>(InDouble.getOperand(1))) {
15150 SDValue BV = InDouble.getOperand(0);
15151 // Look up through any nop bitcasts and vector_reg_casts. bitcasts may
15152 // change lane order under big endian.
15153 bool BVSwap = BV.getOpcode() == ISD::BITCAST;
15154 while (
15155 (BV.getOpcode() == ISD::BITCAST ||
15157 (BV.getValueType() == MVT::v2f64 || BV.getValueType() == MVT::v2i64)) {
15158 BVSwap = BV.getOpcode() == ISD::BITCAST;
15159 BV = BV.getOperand(0);
15160 }
15161 if (BV.getValueType() != MVT::v4i32)
15162 return SDValue();
15163
15164 // Handle buildvectors, pulling out the correct lane depending on
15165 // endianness.
15166 unsigned Offset = InDouble.getConstantOperandVal(1) == 1 ? 2 : 0;
15167 if (BV.getOpcode() == ISD::BUILD_VECTOR) {
15168 SDValue Op0 = BV.getOperand(Offset);
15169 SDValue Op1 = BV.getOperand(Offset + 1);
15170 if (!Subtarget->isLittle() && BVSwap)
15171 std::swap(Op0, Op1);
15172
15173 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15174 }
15175
15176 // A chain of insert_vectors, grabbing the correct value of the chain of
15177 // inserts.
15178 SDValue Op0, Op1;
15179 while (BV.getOpcode() == ISD::INSERT_VECTOR_ELT) {
15180 if (isa<ConstantSDNode>(BV.getOperand(2))) {
15181 if (BV.getConstantOperandVal(2) == Offset && !Op0)
15182 Op0 = BV.getOperand(1);
15183 if (BV.getConstantOperandVal(2) == Offset + 1 && !Op1)
15184 Op1 = BV.getOperand(1);
15185 }
15186 BV = BV.getOperand(0);
15187 }
15188 if (!Subtarget->isLittle() && BVSwap)
15189 std::swap(Op0, Op1);
15190 if (Op0 && Op1)
15191 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15192 }
15193
15194 return SDValue();
15195}
15196
15197/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
15198/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
15200 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
15201 SDValue Op0 = N->getOperand(0);
15202 SDValue Op1 = N->getOperand(1);
15203 if (Op0.getOpcode() == ISD::BITCAST)
15204 Op0 = Op0.getOperand(0);
15205 if (Op1.getOpcode() == ISD::BITCAST)
15206 Op1 = Op1.getOperand(0);
15207 if (Op0.getOpcode() == ARMISD::VMOVRRD &&
15208 Op0.getNode() == Op1.getNode() &&
15209 Op0.getResNo() == 0 && Op1.getResNo() == 1)
15210 return DAG.getNode(ISD::BITCAST, SDLoc(N),
15211 N->getValueType(0), Op0.getOperand(0));
15212 return SDValue();
15213}
15214
15217 SDValue Op0 = N->getOperand(0);
15218
15219 // VMOVhr (VMOVrh (X)) -> X
15220 if (Op0->getOpcode() == ARMISD::VMOVrh)
15221 return Op0->getOperand(0);
15222
15223 // FullFP16: half values are passed in S-registers, and we don't
15224 // need any of the bitcast and moves:
15225 //
15226 // t2: f32,ch1,gl1? = CopyFromReg ch, Register:f32 %0, gl?
15227 // t5: i32 = bitcast t2
15228 // t18: f16 = ARMISD::VMOVhr t5
15229 // =>
15230 // tN: f16,ch2,gl2? = CopyFromReg ch, Register::f32 %0, gl?
15231 if (Op0->getOpcode() == ISD::BITCAST) {
15232 SDValue Copy = Op0->getOperand(0);
15233 if (Copy.getValueType() == MVT::f32 &&
15234 Copy->getOpcode() == ISD::CopyFromReg) {
15235 bool HasGlue = Copy->getNumOperands() == 3;
15236 SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1),
15237 HasGlue ? Copy->getOperand(2) : SDValue()};
15238 EVT OutTys[] = {N->getValueType(0), MVT::Other, MVT::Glue};
15239 SDValue NewCopy =
15241 DCI.DAG.getVTList(ArrayRef(OutTys, HasGlue ? 3 : 2)),
15242 ArrayRef(Ops, HasGlue ? 3 : 2));
15243
15244 // Update Users, Chains, and Potential Glue.
15245 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), NewCopy.getValue(0));
15246 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(1), NewCopy.getValue(1));
15247 if (HasGlue)
15248 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(2),
15249 NewCopy.getValue(2));
15250
15251 return NewCopy;
15252 }
15253 }
15254
15255 // fold (VMOVhr (load x)) -> (load (f16*)x)
15256 if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
15257 if (LN0->hasOneUse() && LN0->isUnindexed() &&
15258 LN0->getMemoryVT() == MVT::i16) {
15259 SDValue Load =
15260 DCI.DAG.getLoad(N->getValueType(0), SDLoc(N), LN0->getChain(),
15261 LN0->getBasePtr(), LN0->getMemOperand());
15262 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15263 DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
15264 return Load;
15265 }
15266 }
15267
15268 // Only the bottom 16 bits of the source register are used.
15269 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15270 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15271 if (TLI.SimplifyDemandedBits(Op0, DemandedMask, DCI))
15272 return SDValue(N, 0);
15273
15274 return SDValue();
15275}
15276
15278 SDValue N0 = N->getOperand(0);
15279 EVT VT = N->getValueType(0);
15280
15281 // fold (VMOVrh (fpconst x)) -> const x
15282 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0)) {
15283 APFloat V = C->getValueAPF();
15284 return DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT);
15285 }
15286
15287 // fold (VMOVrh (load x)) -> (zextload (i16*)x)
15288 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
15289 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15290
15291 SDValue Load =
15292 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(),
15293 LN0->getBasePtr(), MVT::i16, LN0->getMemOperand());
15294 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15295 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15296 return Load;
15297 }
15298
15299 // Fold VMOVrh(extract(x, n)) -> vgetlaneu(x, n)
15300 if (N0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15301 isa<ConstantSDNode>(N0->getOperand(1)))
15302 return DAG.getNode(ARMISD::VGETLANEu, SDLoc(N), VT, N0->getOperand(0),
15303 N0->getOperand(1));
15304
15305 return SDValue();
15306}
15307
15308/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
15309/// are normal, non-volatile loads. If so, it is profitable to bitcast an
15310/// i64 vector to have f64 elements, since the value can then be loaded
15311/// directly into a VFP register.
15313 unsigned NumElts = N->getValueType(0).getVectorNumElements();
15314 for (unsigned i = 0; i < NumElts; ++i) {
15315 SDNode *Elt = N->getOperand(i).getNode();
15316 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
15317 return true;
15318 }
15319 return false;
15320}
15321
15322/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
15323/// ISD::BUILD_VECTOR.
15326 const ARMSubtarget *Subtarget) {
15327 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
15328 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
15329 // into a pair of GPRs, which is fine when the value is used as a scalar,
15330 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
15331 SelectionDAG &DAG = DCI.DAG;
15332 if (N->getNumOperands() == 2)
15333 if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
15334 return RV;
15335
15336 // Load i64 elements as f64 values so that type legalization does not split
15337 // them up into i32 values.
15338 EVT VT = N->getValueType(0);
15339 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
15340 return SDValue();
15341 SDLoc dl(N);
15343 unsigned NumElts = VT.getVectorNumElements();
15344 for (unsigned i = 0; i < NumElts; ++i) {
15345 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
15346 Ops.push_back(V);
15347 // Make the DAGCombiner fold the bitcast.
15348 DCI.AddToWorklist(V.getNode());
15349 }
15350 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
15351 SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
15352 return DAG.getNode(ISD::BITCAST, dl, VT, BV);
15353}
15354
15355/// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
15356static SDValue
15358 // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
15359 // At that time, we may have inserted bitcasts from integer to float.
15360 // If these bitcasts have survived DAGCombine, change the lowering of this
15361 // BUILD_VECTOR in something more vector friendly, i.e., that does not
15362 // force to use floating point types.
15363
15364 // Make sure we can change the type of the vector.
15365 // This is possible iff:
15366 // 1. The vector is only used in a bitcast to a integer type. I.e.,
15367 // 1.1. Vector is used only once.
15368 // 1.2. Use is a bit convert to an integer type.
15369 // 2. The size of its operands are 32-bits (64-bits are not legal).
15370 EVT VT = N->getValueType(0);
15371 EVT EltVT = VT.getVectorElementType();
15372
15373 // Check 1.1. and 2.
15374 if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
15375 return SDValue();
15376
15377 // By construction, the input type must be float.
15378 assert(EltVT == MVT::f32 && "Unexpected type!");
15379
15380 // Check 1.2.
15381 SDNode *Use = *N->user_begin();
15382 if (Use->getOpcode() != ISD::BITCAST ||
15383 Use->getValueType(0).isFloatingPoint())
15384 return SDValue();
15385
15386 // Check profitability.
15387 // Model is, if more than half of the relevant operands are bitcast from
15388 // i32, turn the build_vector into a sequence of insert_vector_elt.
15389 // Relevant operands are everything that is not statically
15390 // (i.e., at compile time) bitcasted.
15391 unsigned NumOfBitCastedElts = 0;
15392 unsigned NumElts = VT.getVectorNumElements();
15393 unsigned NumOfRelevantElts = NumElts;
15394 for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
15395 SDValue Elt = N->getOperand(Idx);
15396 if (Elt->getOpcode() == ISD::BITCAST) {
15397 // Assume only bit cast to i32 will go away.
15398 if (Elt->getOperand(0).getValueType() == MVT::i32)
15399 ++NumOfBitCastedElts;
15400 } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
15401 // Constants are statically casted, thus do not count them as
15402 // relevant operands.
15403 --NumOfRelevantElts;
15404 }
15405
15406 // Check if more than half of the elements require a non-free bitcast.
15407 if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
15408 return SDValue();
15409
15410 SelectionDAG &DAG = DCI.DAG;
15411 // Create the new vector type.
15412 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
15413 // Check if the type is legal.
15414 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15415 if (!TLI.isTypeLegal(VecVT))
15416 return SDValue();
15417
15418 // Combine:
15419 // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
15420 // => BITCAST INSERT_VECTOR_ELT
15421 // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
15422 // (BITCAST EN), N.
15423 SDValue Vec = DAG.getUNDEF(VecVT);
15424 SDLoc dl(N);
15425 for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
15426 SDValue V = N->getOperand(Idx);
15427 if (V.isUndef())
15428 continue;
15429 if (V.getOpcode() == ISD::BITCAST &&
15430 V->getOperand(0).getValueType() == MVT::i32)
15431 // Fold obvious case.
15432 V = V.getOperand(0);
15433 else {
15434 V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
15435 // Make the DAGCombiner fold the bitcasts.
15436 DCI.AddToWorklist(V.getNode());
15437 }
15438 SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
15439 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
15440 }
15441 Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
15442 // Make the DAGCombiner fold the bitcasts.
15443 DCI.AddToWorklist(Vec.getNode());
15444 return Vec;
15445}
15446
15447static SDValue
15449 EVT VT = N->getValueType(0);
15450 SDValue Op = N->getOperand(0);
15451 SDLoc dl(N);
15452
15453 // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x)
15454 if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {
15455 // If the valuetypes are the same, we can remove the cast entirely.
15456 if (Op->getOperand(0).getValueType() == VT)
15457 return Op->getOperand(0);
15458 return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15459 }
15460
15461 // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
15462 // more VPNOT which might get folded as else predicates.
15463 if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) {
15464 SDValue X =
15465 DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15467 DCI.DAG.getConstant(65535, dl, MVT::i32));
15468 return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C);
15469 }
15470
15471 // Only the bottom 16 bits of the source register are used.
15472 if (Op.getValueType() == MVT::i32) {
15473 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15474 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15475 if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
15476 return SDValue(N, 0);
15477 }
15478 return SDValue();
15479}
15480
15482 const ARMSubtarget *ST) {
15483 EVT VT = N->getValueType(0);
15484 SDValue Op = N->getOperand(0);
15485 SDLoc dl(N);
15486
15487 // Under Little endian, a VECTOR_REG_CAST is equivalent to a BITCAST
15488 if (ST->isLittle())
15489 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
15490
15491 // VT VECTOR_REG_CAST (VT Op) -> Op
15492 if (Op.getValueType() == VT)
15493 return Op;
15494 // VECTOR_REG_CAST undef -> undef
15495 if (Op.isUndef())
15496 return DAG.getUNDEF(VT);
15497
15498 // VECTOR_REG_CAST(VECTOR_REG_CAST(x)) == VECTOR_REG_CAST(x)
15499 if (Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {
15500 // If the valuetypes are the same, we can remove the cast entirely.
15501 if (Op->getOperand(0).getValueType() == VT)
15502 return Op->getOperand(0);
15503 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Op->getOperand(0));
15504 }
15505
15506 return SDValue();
15507}
15508
15510 const ARMSubtarget *Subtarget) {
15511 if (!Subtarget->hasMVEIntegerOps())
15512 return SDValue();
15513
15514 EVT VT = N->getValueType(0);
15515 SDValue Op0 = N->getOperand(0);
15516 SDValue Op1 = N->getOperand(1);
15517 ARMCC::CondCodes Cond = (ARMCC::CondCodes)N->getConstantOperandVal(2);
15518 SDLoc dl(N);
15519
15520 // vcmp X, 0, cc -> vcmpz X, cc
15521 if (isZeroVector(Op1))
15522 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0, N->getOperand(2));
15523
15524 unsigned SwappedCond = getSwappedCondition(Cond);
15525 if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) {
15526 // vcmp 0, X, cc -> vcmpz X, reversed(cc)
15527 if (isZeroVector(Op0))
15528 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1,
15529 DAG.getConstant(SwappedCond, dl, MVT::i32));
15530 // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)
15531 if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)
15532 return DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0,
15533 DAG.getConstant(SwappedCond, dl, MVT::i32));
15534 }
15535
15536 return SDValue();
15537}
15538
15539/// PerformInsertEltCombine - Target-specific dag combine xforms for
15540/// ISD::INSERT_VECTOR_ELT.
15543 // Bitcast an i64 load inserted into a vector to f64.
15544 // Otherwise, the i64 value will be legalized to a pair of i32 values.
15545 EVT VT = N->getValueType(0);
15546 SDNode *Elt = N->getOperand(1).getNode();
15547 if (VT.getVectorElementType() != MVT::i64 ||
15548 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
15549 return SDValue();
15550
15551 SelectionDAG &DAG = DCI.DAG;
15552 SDLoc dl(N);
15553 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
15555 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
15556 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
15557 // Make the DAGCombiner fold the bitcasts.
15558 DCI.AddToWorklist(Vec.getNode());
15559 DCI.AddToWorklist(V.getNode());
15560 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
15561 Vec, V, N->getOperand(2));
15562 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
15563}
15564
15565// Convert a pair of extracts from the same base vector to a VMOVRRD. Either
15566// directly or bitcast to an integer if the original is a float vector.
15567// extract(x, n); extract(x, n+1) -> VMOVRRD(extract v2f64 x, n/2)
15568// bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD(extract x, n/2)
15569static SDValue
15571 EVT VT = N->getValueType(0);
15572 SDLoc dl(N);
15573
15574 if (!DCI.isAfterLegalizeDAG() || VT != MVT::i32 ||
15575 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(MVT::f64))
15576 return SDValue();
15577
15578 SDValue Ext = SDValue(N, 0);
15579 if (Ext.getOpcode() == ISD::BITCAST &&
15580 Ext.getOperand(0).getValueType() == MVT::f32)
15581 Ext = Ext.getOperand(0);
15582 if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15583 !isa<ConstantSDNode>(Ext.getOperand(1)) ||
15584 Ext.getConstantOperandVal(1) % 2 != 0)
15585 return SDValue();
15586 if (Ext->hasOneUse() && (Ext->user_begin()->getOpcode() == ISD::SINT_TO_FP ||
15587 Ext->user_begin()->getOpcode() == ISD::UINT_TO_FP))
15588 return SDValue();
15589
15590 SDValue Op0 = Ext.getOperand(0);
15591 EVT VecVT = Op0.getValueType();
15592 unsigned ResNo = Op0.getResNo();
15593 unsigned Lane = Ext.getConstantOperandVal(1);
15594 if (VecVT.getVectorNumElements() != 4)
15595 return SDValue();
15596
15597 // Find another extract, of Lane + 1
15598 auto OtherIt = find_if(Op0->users(), [&](SDNode *V) {
15599 return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15600 isa<ConstantSDNode>(V->getOperand(1)) &&
15601 V->getConstantOperandVal(1) == Lane + 1 &&
15602 V->getOperand(0).getResNo() == ResNo;
15603 });
15604 if (OtherIt == Op0->users().end())
15605 return SDValue();
15606
15607 // For float extracts, we need to be converting to a i32 for both vector
15608 // lanes.
15609 SDValue OtherExt(*OtherIt, 0);
15610 if (OtherExt.getValueType() != MVT::i32) {
15611 if (!OtherExt->hasOneUse() ||
15612 OtherExt->user_begin()->getOpcode() != ISD::BITCAST ||
15613 OtherExt->user_begin()->getValueType(0) != MVT::i32)
15614 return SDValue();
15615 OtherExt = SDValue(*OtherExt->user_begin(), 0);
15616 }
15617
15618 // Convert the type to a f64 and extract with a VMOVRRD.
15619 SDValue F64 = DCI.DAG.getNode(
15620 ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
15621 DCI.DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v2f64, Op0),
15622 DCI.DAG.getConstant(Ext.getConstantOperandVal(1) / 2, dl, MVT::i32));
15623 SDValue VMOVRRD =
15624 DCI.DAG.getNode(ARMISD::VMOVRRD, dl, {MVT::i32, MVT::i32}, F64);
15625
15626 DCI.CombineTo(OtherExt.getNode(), SDValue(VMOVRRD.getNode(), 1));
15627 return VMOVRRD;
15628}
15629
15632 const ARMSubtarget *ST) {
15633 SDValue Op0 = N->getOperand(0);
15634 EVT VT = N->getValueType(0);
15635 SDLoc dl(N);
15636
15637 // extract (vdup x) -> x
15638 if (Op0->getOpcode() == ARMISD::VDUP) {
15639 SDValue X = Op0->getOperand(0);
15640 if (VT == MVT::f16 && X.getValueType() == MVT::i32)
15641 return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X);
15642 if (VT == MVT::i32 && X.getValueType() == MVT::f16)
15643 return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X);
15644 if (VT == MVT::f32 && X.getValueType() == MVT::i32)
15645 return DCI.DAG.getNode(ISD::BITCAST, dl, VT, X);
15646
15647 while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
15648 X = X->getOperand(0);
15649 if (X.getValueType() == VT)
15650 return X;
15651 }
15652
15653 // extract ARM_BUILD_VECTOR -> x
15654 if (Op0->getOpcode() == ARMISD::BUILD_VECTOR &&
15655 isa<ConstantSDNode>(N->getOperand(1)) &&
15656 N->getConstantOperandVal(1) < Op0.getNumOperands()) {
15657 return Op0.getOperand(N->getConstantOperandVal(1));
15658 }
15659
15660 // extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b
15661 if (Op0.getValueType() == MVT::v4i32 &&
15662 isa<ConstantSDNode>(N->getOperand(1)) &&
15663 Op0.getOpcode() == ISD::BITCAST &&
15665 Op0.getOperand(0).getValueType() == MVT::v2f64) {
15666 SDValue BV = Op0.getOperand(0);
15667 unsigned Offset = N->getConstantOperandVal(1);
15668 SDValue MOV = BV.getOperand(Offset < 2 ? 0 : 1);
15669 if (MOV.getOpcode() == ARMISD::VMOVDRR)
15670 return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);
15671 }
15672
15673 // extract x, n; extract x, n+1 -> VMOVRRD x
15674 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
15675 return R;
15676
15677 // extract (MVETrunc(x)) -> extract x
15678 if (Op0->getOpcode() == ARMISD::MVETRUNC) {
15679 unsigned Idx = N->getConstantOperandVal(1);
15680 unsigned Vec =
15682 unsigned SubIdx =
15684 return DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Op0.getOperand(Vec),
15685 DCI.DAG.getConstant(SubIdx, dl, MVT::i32));
15686 }
15687
15688 return SDValue();
15689}
15690
15692 SDValue Op = N->getOperand(0);
15693 EVT VT = N->getValueType(0);
15694
15695 // sext_inreg(VGETLANEu) -> VGETLANEs
15696 if (Op.getOpcode() == ARMISD::VGETLANEu &&
15697 cast<VTSDNode>(N->getOperand(1))->getVT() ==
15698 Op.getOperand(0).getValueType().getScalarType())
15699 return DAG.getNode(ARMISD::VGETLANEs, SDLoc(N), VT, Op.getOperand(0),
15700 Op.getOperand(1));
15701
15702 return SDValue();
15703}
15704
15705static SDValue
15707 SDValue Vec = N->getOperand(0);
15708 SDValue SubVec = N->getOperand(1);
15709 uint64_t IdxVal = N->getConstantOperandVal(2);
15710 EVT VecVT = Vec.getValueType();
15711 EVT SubVT = SubVec.getValueType();
15712
15713 // Only do this for legal fixed vector types.
15714 if (!VecVT.isFixedLengthVector() ||
15715 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
15717 return SDValue();
15718
15719 // Ignore widening patterns.
15720 if (IdxVal == 0 && Vec.isUndef())
15721 return SDValue();
15722
15723 // Subvector must be half the width and an "aligned" insertion.
15724 unsigned NumSubElts = SubVT.getVectorNumElements();
15725 if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
15726 (IdxVal != 0 && IdxVal != NumSubElts))
15727 return SDValue();
15728
15729 // Fold insert_subvector -> concat_vectors
15730 // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
15731 // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
15732 SDLoc DL(N);
15733 SDValue Lo, Hi;
15734 if (IdxVal == 0) {
15735 Lo = SubVec;
15736 Hi = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15737 DCI.DAG.getVectorIdxConstant(NumSubElts, DL));
15738 } else {
15739 Lo = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15740 DCI.DAG.getVectorIdxConstant(0, DL));
15741 Hi = SubVec;
15742 }
15743 return DCI.DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
15744}
15745
15746// shuffle(MVETrunc(x, y)) -> VMOVN(x, y)
15748 SelectionDAG &DAG) {
15749 SDValue Trunc = N->getOperand(0);
15750 EVT VT = Trunc.getValueType();
15751 if (Trunc.getOpcode() != ARMISD::MVETRUNC || !N->getOperand(1).isUndef())
15752 return SDValue();
15753
15754 SDLoc DL(Trunc);
15755 if (isVMOVNTruncMask(N->getMask(), VT, false))
15756 return DAG.getNode(
15757 ARMISD::VMOVN, DL, VT,
15758 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15759 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15760 DAG.getConstant(1, DL, MVT::i32));
15761 else if (isVMOVNTruncMask(N->getMask(), VT, true))
15762 return DAG.getNode(
15763 ARMISD::VMOVN, DL, VT,
15764 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15765 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15766 DAG.getConstant(1, DL, MVT::i32));
15767 return SDValue();
15768}
15769
15770/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
15771/// ISD::VECTOR_SHUFFLE.
15773 if (SDValue R = PerformShuffleVMOVNCombine(cast<ShuffleVectorSDNode>(N), DAG))
15774 return R;
15775
15776 // The LLVM shufflevector instruction does not require the shuffle mask
15777 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
15778 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
15779 // operands do not match the mask length, they are extended by concatenating
15780 // them with undef vectors. That is probably the right thing for other
15781 // targets, but for NEON it is better to concatenate two double-register
15782 // size vector operands into a single quad-register size vector. Do that
15783 // transformation here:
15784 // shuffle(concat(v1, undef), concat(v2, undef)) ->
15785 // shuffle(concat(v1, v2), undef)
15786 SDValue Op0 = N->getOperand(0);
15787 SDValue Op1 = N->getOperand(1);
15788 if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
15789 Op1.getOpcode() != ISD::CONCAT_VECTORS ||
15790 Op0.getNumOperands() != 2 ||
15791 Op1.getNumOperands() != 2)
15792 return SDValue();
15793 SDValue Concat0Op1 = Op0.getOperand(1);
15794 SDValue Concat1Op1 = Op1.getOperand(1);
15795 if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
15796 return SDValue();
15797 // Skip the transformation if any of the types are illegal.
15798 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15799 EVT VT = N->getValueType(0);
15800 if (!TLI.isTypeLegal(VT) ||
15801 !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
15802 !TLI.isTypeLegal(Concat1Op1.getValueType()))
15803 return SDValue();
15804
15805 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
15806 Op0.getOperand(0), Op1.getOperand(0));
15807 // Translate the shuffle mask.
15808 SmallVector<int, 16> NewMask;
15809 unsigned NumElts = VT.getVectorNumElements();
15810 unsigned HalfElts = NumElts/2;
15811 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15812 for (unsigned n = 0; n < NumElts; ++n) {
15813 int MaskElt = SVN->getMaskElt(n);
15814 int NewElt = -1;
15815 if (MaskElt < (int)HalfElts)
15816 NewElt = MaskElt;
15817 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
15818 NewElt = HalfElts + MaskElt - NumElts;
15819 NewMask.push_back(NewElt);
15820 }
15821 return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
15822 DAG.getUNDEF(VT), NewMask);
15823}
15824
15825/// Load/store instruction that can be merged with a base address
15826/// update
15831 unsigned AddrOpIdx;
15832};
15833
15835 /// Instruction that updates a pointer
15837 /// Pointer increment operand
15839 /// Pointer increment value if it is a constant, or 0 otherwise
15840 unsigned ConstInc;
15841};
15842
15844 struct BaseUpdateUser &User,
15845 bool SimpleConstIncOnly,
15847 SelectionDAG &DAG = DCI.DAG;
15848 SDNode *N = Target.N;
15849 MemSDNode *MemN = cast<MemSDNode>(N);
15850 SDLoc dl(N);
15851
15852 // Find the new opcode for the updating load/store.
15853 bool isLoadOp = true;
15854 bool isLaneOp = false;
15855 // Workaround for vst1x and vld1x intrinsics which do not have alignment
15856 // as an operand.
15857 bool hasAlignment = true;
15858 unsigned NewOpc = 0;
15859 unsigned NumVecs = 0;
15860 if (Target.isIntrinsic) {
15861 unsigned IntNo = N->getConstantOperandVal(1);
15862 switch (IntNo) {
15863 default:
15864 llvm_unreachable("unexpected intrinsic for Neon base update");
15865 case Intrinsic::arm_neon_vld1:
15866 NewOpc = ARMISD::VLD1_UPD;
15867 NumVecs = 1;
15868 break;
15869 case Intrinsic::arm_neon_vld2:
15870 NewOpc = ARMISD::VLD2_UPD;
15871 NumVecs = 2;
15872 break;
15873 case Intrinsic::arm_neon_vld3:
15874 NewOpc = ARMISD::VLD3_UPD;
15875 NumVecs = 3;
15876 break;
15877 case Intrinsic::arm_neon_vld4:
15878 NewOpc = ARMISD::VLD4_UPD;
15879 NumVecs = 4;
15880 break;
15881 case Intrinsic::arm_neon_vld1x2:
15882 NewOpc = ARMISD::VLD1x2_UPD;
15883 NumVecs = 2;
15884 hasAlignment = false;
15885 break;
15886 case Intrinsic::arm_neon_vld1x3:
15887 NewOpc = ARMISD::VLD1x3_UPD;
15888 NumVecs = 3;
15889 hasAlignment = false;
15890 break;
15891 case Intrinsic::arm_neon_vld1x4:
15892 NewOpc = ARMISD::VLD1x4_UPD;
15893 NumVecs = 4;
15894 hasAlignment = false;
15895 break;
15896 case Intrinsic::arm_neon_vld2dup:
15897 NewOpc = ARMISD::VLD2DUP_UPD;
15898 NumVecs = 2;
15899 break;
15900 case Intrinsic::arm_neon_vld3dup:
15901 NewOpc = ARMISD::VLD3DUP_UPD;
15902 NumVecs = 3;
15903 break;
15904 case Intrinsic::arm_neon_vld4dup:
15905 NewOpc = ARMISD::VLD4DUP_UPD;
15906 NumVecs = 4;
15907 break;
15908 case Intrinsic::arm_neon_vld2lane:
15909 NewOpc = ARMISD::VLD2LN_UPD;
15910 NumVecs = 2;
15911 isLaneOp = true;
15912 break;
15913 case Intrinsic::arm_neon_vld3lane:
15914 NewOpc = ARMISD::VLD3LN_UPD;
15915 NumVecs = 3;
15916 isLaneOp = true;
15917 break;
15918 case Intrinsic::arm_neon_vld4lane:
15919 NewOpc = ARMISD::VLD4LN_UPD;
15920 NumVecs = 4;
15921 isLaneOp = true;
15922 break;
15923 case Intrinsic::arm_neon_vst1:
15924 NewOpc = ARMISD::VST1_UPD;
15925 NumVecs = 1;
15926 isLoadOp = false;
15927 break;
15928 case Intrinsic::arm_neon_vst2:
15929 NewOpc = ARMISD::VST2_UPD;
15930 NumVecs = 2;
15931 isLoadOp = false;
15932 break;
15933 case Intrinsic::arm_neon_vst3:
15934 NewOpc = ARMISD::VST3_UPD;
15935 NumVecs = 3;
15936 isLoadOp = false;
15937 break;
15938 case Intrinsic::arm_neon_vst4:
15939 NewOpc = ARMISD::VST4_UPD;
15940 NumVecs = 4;
15941 isLoadOp = false;
15942 break;
15943 case Intrinsic::arm_neon_vst2lane:
15944 NewOpc = ARMISD::VST2LN_UPD;
15945 NumVecs = 2;
15946 isLoadOp = false;
15947 isLaneOp = true;
15948 break;
15949 case Intrinsic::arm_neon_vst3lane:
15950 NewOpc = ARMISD::VST3LN_UPD;
15951 NumVecs = 3;
15952 isLoadOp = false;
15953 isLaneOp = true;
15954 break;
15955 case Intrinsic::arm_neon_vst4lane:
15956 NewOpc = ARMISD::VST4LN_UPD;
15957 NumVecs = 4;
15958 isLoadOp = false;
15959 isLaneOp = true;
15960 break;
15961 case Intrinsic::arm_neon_vst1x2:
15962 NewOpc = ARMISD::VST1x2_UPD;
15963 NumVecs = 2;
15964 isLoadOp = false;
15965 hasAlignment = false;
15966 break;
15967 case Intrinsic::arm_neon_vst1x3:
15968 NewOpc = ARMISD::VST1x3_UPD;
15969 NumVecs = 3;
15970 isLoadOp = false;
15971 hasAlignment = false;
15972 break;
15973 case Intrinsic::arm_neon_vst1x4:
15974 NewOpc = ARMISD::VST1x4_UPD;
15975 NumVecs = 4;
15976 isLoadOp = false;
15977 hasAlignment = false;
15978 break;
15979 }
15980 } else {
15981 isLaneOp = true;
15982 switch (N->getOpcode()) {
15983 default:
15984 llvm_unreachable("unexpected opcode for Neon base update");
15985 case ARMISD::VLD1DUP:
15986 NewOpc = ARMISD::VLD1DUP_UPD;
15987 NumVecs = 1;
15988 break;
15989 case ARMISD::VLD2DUP:
15990 NewOpc = ARMISD::VLD2DUP_UPD;
15991 NumVecs = 2;
15992 break;
15993 case ARMISD::VLD3DUP:
15994 NewOpc = ARMISD::VLD3DUP_UPD;
15995 NumVecs = 3;
15996 break;
15997 case ARMISD::VLD4DUP:
15998 NewOpc = ARMISD::VLD4DUP_UPD;
15999 NumVecs = 4;
16000 break;
16001 case ISD::LOAD:
16002 NewOpc = ARMISD::VLD1_UPD;
16003 NumVecs = 1;
16004 isLaneOp = false;
16005 break;
16006 case ISD::STORE:
16007 NewOpc = ARMISD::VST1_UPD;
16008 NumVecs = 1;
16009 isLaneOp = false;
16010 isLoadOp = false;
16011 break;
16012 }
16013 }
16014
16015 // Find the size of memory referenced by the load/store.
16016 EVT VecTy;
16017 if (isLoadOp) {
16018 VecTy = N->getValueType(0);
16019 } else if (Target.isIntrinsic) {
16020 VecTy = N->getOperand(Target.AddrOpIdx + 1).getValueType();
16021 } else {
16022 assert(Target.isStore &&
16023 "Node has to be a load, a store, or an intrinsic!");
16024 VecTy = N->getOperand(1).getValueType();
16025 }
16026
16027 bool isVLDDUPOp =
16028 NewOpc == ARMISD::VLD1DUP_UPD || NewOpc == ARMISD::VLD2DUP_UPD ||
16029 NewOpc == ARMISD::VLD3DUP_UPD || NewOpc == ARMISD::VLD4DUP_UPD;
16030
16031 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
16032 if (isLaneOp || isVLDDUPOp)
16033 NumBytes /= VecTy.getVectorNumElements();
16034
16035 if (NumBytes >= 3 * 16 && User.ConstInc != NumBytes) {
16036 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
16037 // separate instructions that make it harder to use a non-constant update.
16038 return false;
16039 }
16040
16041 if (SimpleConstIncOnly && User.ConstInc != NumBytes)
16042 return false;
16043
16044 // OK, we found an ADD we can fold into the base update.
16045 // Now, create a _UPD node, taking care of not breaking alignment.
16046
16047 EVT AlignedVecTy = VecTy;
16048 Align Alignment = MemN->getAlign();
16049
16050 // If this is a less-than-standard-aligned load/store, change the type to
16051 // match the standard alignment.
16052 // The alignment is overlooked when selecting _UPD variants; and it's
16053 // easier to introduce bitcasts here than fix that.
16054 // There are 3 ways to get to this base-update combine:
16055 // - intrinsics: they are assumed to be properly aligned (to the standard
16056 // alignment of the memory type), so we don't need to do anything.
16057 // - ARMISD::VLDx nodes: they are only generated from the aforementioned
16058 // intrinsics, so, likewise, there's nothing to do.
16059 // - generic load/store instructions: the alignment is specified as an
16060 // explicit operand, rather than implicitly as the standard alignment
16061 // of the memory type (like the intrisics). We need to change the
16062 // memory type to match the explicit alignment. That way, we don't
16063 // generate non-standard-aligned ARMISD::VLDx nodes.
16064 if (isa<LSBaseSDNode>(N)) {
16065 if (Alignment.value() < VecTy.getScalarSizeInBits() / 8) {
16066 MVT EltTy = MVT::getIntegerVT(Alignment.value() * 8);
16067 assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
16068 assert(!isLaneOp && "Unexpected generic load/store lane.");
16069 unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
16070 AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
16071 }
16072 // Don't set an explicit alignment on regular load/stores that we want
16073 // to transform to VLD/VST 1_UPD nodes.
16074 // This matches the behavior of regular load/stores, which only get an
16075 // explicit alignment if the MMO alignment is larger than the standard
16076 // alignment of the memory type.
16077 // Intrinsics, however, always get an explicit alignment, set to the
16078 // alignment of the MMO.
16079 Alignment = Align(1);
16080 }
16081
16082 // Create the new updating load/store node.
16083 // First, create an SDVTList for the new updating node's results.
16084 EVT Tys[6];
16085 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16086 unsigned n;
16087 for (n = 0; n < NumResultVecs; ++n)
16088 Tys[n] = AlignedVecTy;
16089 Tys[n++] = MVT::i32;
16090 Tys[n] = MVT::Other;
16091 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16092
16093 // Then, gather the new node's operands.
16095 Ops.push_back(N->getOperand(0)); // incoming chain
16096 Ops.push_back(N->getOperand(Target.AddrOpIdx));
16097 Ops.push_back(User.Inc);
16098
16099 if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
16100 // Try to match the intrinsic's signature
16101 Ops.push_back(StN->getValue());
16102 } else {
16103 // Loads (and of course intrinsics) match the intrinsics' signature,
16104 // so just add all but the alignment operand.
16105 unsigned LastOperand =
16106 hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();
16107 for (unsigned i = Target.AddrOpIdx + 1; i < LastOperand; ++i)
16108 Ops.push_back(N->getOperand(i));
16109 }
16110
16111 // For all node types, the alignment operand is always the last one.
16112 Ops.push_back(DAG.getConstant(Alignment.value(), dl, MVT::i32));
16113
16114 // If this is a non-standard-aligned STORE, the penultimate operand is the
16115 // stored value. Bitcast it to the aligned type.
16116 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
16117 SDValue &StVal = Ops[Ops.size() - 2];
16118 StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
16119 }
16120
16121 EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
16122 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
16123 MemN->getMemOperand());
16124
16125 // Update the uses.
16126 SmallVector<SDValue, 5> NewResults;
16127 for (unsigned i = 0; i < NumResultVecs; ++i)
16128 NewResults.push_back(SDValue(UpdN.getNode(), i));
16129
16130 // If this is an non-standard-aligned LOAD, the first result is the loaded
16131 // value. Bitcast it to the expected result type.
16132 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
16133 SDValue &LdVal = NewResults[0];
16134 LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
16135 }
16136
16137 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16138 DCI.CombineTo(N, NewResults);
16139 DCI.CombineTo(User.N, SDValue(UpdN.getNode(), NumResultVecs));
16140
16141 return true;
16142}
16143
16144// If (opcode ptr inc) is and ADD-like instruction, return the
16145// increment value. Otherwise return 0.
16146static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr,
16147 SDValue Inc, const SelectionDAG &DAG) {
16148 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16149 if (!CInc)
16150 return 0;
16151
16152 switch (Opcode) {
16153 case ARMISD::VLD1_UPD:
16154 case ISD::ADD:
16155 return CInc->getZExtValue();
16156 case ISD::OR: {
16157 if (DAG.haveNoCommonBitsSet(Ptr, Inc)) {
16158 // (OR ptr inc) is the same as (ADD ptr inc)
16159 return CInc->getZExtValue();
16160 }
16161 return 0;
16162 }
16163 default:
16164 return 0;
16165 }
16166}
16167
16169 switch (N->getOpcode()) {
16170 case ISD::ADD:
16171 case ISD::OR: {
16172 if (isa<ConstantSDNode>(N->getOperand(1))) {
16173 *Ptr = N->getOperand(0);
16174 *CInc = N->getOperand(1);
16175 return true;
16176 }
16177 return false;
16178 }
16179 case ARMISD::VLD1_UPD: {
16180 if (isa<ConstantSDNode>(N->getOperand(2))) {
16181 *Ptr = N->getOperand(1);
16182 *CInc = N->getOperand(2);
16183 return true;
16184 }
16185 return false;
16186 }
16187 default:
16188 return false;
16189 }
16190}
16191
16193 // Check that the add is independent of the load/store.
16194 // Otherwise, folding it would create a cycle. Search through Addr
16195 // as well, since the User may not be a direct user of Addr and
16196 // only share a base pointer.
16199 Worklist.push_back(N);
16200 Worklist.push_back(User);
16201 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16202 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16203 return false;
16204 return true;
16205}
16206
16207/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
16208/// NEON load/store intrinsics, and generic vector load/stores, to merge
16209/// base address updates.
16210/// For generic load/stores, the memory type is assumed to be a vector.
16211/// The caller is assumed to have checked legality.
16214 const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
16215 N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
16216 const bool isStore = N->getOpcode() == ISD::STORE;
16217 const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
16218 BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
16219
16220 SDValue Addr = N->getOperand(AddrOpIdx);
16221
16223
16224 // Search for a use of the address operand that is an increment.
16225 for (SDUse &Use : Addr->uses()) {
16226 SDNode *User = Use.getUser();
16227 if (Use.getResNo() != Addr.getResNo() || User->getNumOperands() != 2)
16228 continue;
16229
16230 SDValue Inc = User->getOperand(Use.getOperandNo() == 1 ? 0 : 1);
16231 unsigned ConstInc =
16232 getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
16233
16234 if (ConstInc || User->getOpcode() == ISD::ADD)
16235 BaseUpdates.push_back({User, Inc, ConstInc});
16236 }
16237
16238 // If the address is a constant pointer increment itself, find
16239 // another constant increment that has the same base operand
16240 SDValue Base;
16241 SDValue CInc;
16242 if (findPointerConstIncrement(Addr.getNode(), &Base, &CInc)) {
16243 unsigned Offset =
16244 getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG);
16245 for (SDUse &Use : Base->uses()) {
16246
16247 SDNode *User = Use.getUser();
16248 if (Use.getResNo() != Base.getResNo() || User == Addr.getNode() ||
16249 User->getNumOperands() != 2)
16250 continue;
16251
16252 SDValue UserInc = User->getOperand(Use.getOperandNo() == 0 ? 1 : 0);
16253 unsigned UserOffset =
16254 getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG);
16255
16256 if (!UserOffset || UserOffset <= Offset)
16257 continue;
16258
16259 unsigned NewConstInc = UserOffset - Offset;
16260 SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
16261 BaseUpdates.push_back({User, NewInc, NewConstInc});
16262 }
16263 }
16264
16265 // Try to fold the load/store with an update that matches memory
16266 // access size. This should work well for sequential loads.
16267 //
16268 // Filter out invalid updates as well.
16269 unsigned NumValidUpd = BaseUpdates.size();
16270 for (unsigned I = 0; I < NumValidUpd;) {
16271 BaseUpdateUser &User = BaseUpdates[I];
16272 if (!isValidBaseUpdate(N, User.N)) {
16273 --NumValidUpd;
16274 std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
16275 continue;
16276 }
16277
16278 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
16279 return SDValue();
16280 ++I;
16281 }
16282 BaseUpdates.resize(NumValidUpd);
16283
16284 // Try to fold with other users. Non-constant updates are considered
16285 // first, and constant updates are sorted to not break a sequence of
16286 // strided accesses (if there is any).
16287 std::stable_sort(BaseUpdates.begin(), BaseUpdates.end(),
16288 [](const BaseUpdateUser &LHS, const BaseUpdateUser &RHS) {
16289 return LHS.ConstInc < RHS.ConstInc;
16290 });
16291 for (BaseUpdateUser &User : BaseUpdates) {
16292 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI))
16293 return SDValue();
16294 }
16295 return SDValue();
16296}
16297
16300 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16301 return SDValue();
16302
16303 return CombineBaseUpdate(N, DCI);
16304}
16305
16308 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16309 return SDValue();
16310
16311 SelectionDAG &DAG = DCI.DAG;
16312 SDValue Addr = N->getOperand(2);
16313 MemSDNode *MemN = cast<MemSDNode>(N);
16314 SDLoc dl(N);
16315
16316 // For the stores, where there are multiple intrinsics we only actually want
16317 // to post-inc the last of the them.
16318 unsigned IntNo = N->getConstantOperandVal(1);
16319 if (IntNo == Intrinsic::arm_mve_vst2q && N->getConstantOperandVal(5) != 1)
16320 return SDValue();
16321 if (IntNo == Intrinsic::arm_mve_vst4q && N->getConstantOperandVal(7) != 3)
16322 return SDValue();
16323
16324 // Search for a use of the address operand that is an increment.
16325 for (SDUse &Use : Addr->uses()) {
16326 SDNode *User = Use.getUser();
16327 if (User->getOpcode() != ISD::ADD || Use.getResNo() != Addr.getResNo())
16328 continue;
16329
16330 // Check that the add is independent of the load/store. Otherwise, folding
16331 // it would create a cycle. We can avoid searching through Addr as it's a
16332 // predecessor to both.
16335 Visited.insert(Addr.getNode());
16336 Worklist.push_back(N);
16337 Worklist.push_back(User);
16338 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16339 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16340 continue;
16341
16342 // Find the new opcode for the updating load/store.
16343 bool isLoadOp = true;
16344 unsigned NewOpc = 0;
16345 unsigned NumVecs = 0;
16346 switch (IntNo) {
16347 default:
16348 llvm_unreachable("unexpected intrinsic for MVE VLDn combine");
16349 case Intrinsic::arm_mve_vld2q:
16350 NewOpc = ARMISD::VLD2_UPD;
16351 NumVecs = 2;
16352 break;
16353 case Intrinsic::arm_mve_vld4q:
16354 NewOpc = ARMISD::VLD4_UPD;
16355 NumVecs = 4;
16356 break;
16357 case Intrinsic::arm_mve_vst2q:
16358 NewOpc = ARMISD::VST2_UPD;
16359 NumVecs = 2;
16360 isLoadOp = false;
16361 break;
16362 case Intrinsic::arm_mve_vst4q:
16363 NewOpc = ARMISD::VST4_UPD;
16364 NumVecs = 4;
16365 isLoadOp = false;
16366 break;
16367 }
16368
16369 // Find the size of memory referenced by the load/store.
16370 EVT VecTy;
16371 if (isLoadOp) {
16372 VecTy = N->getValueType(0);
16373 } else {
16374 VecTy = N->getOperand(3).getValueType();
16375 }
16376
16377 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
16378
16379 // If the increment is a constant, it must match the memory ref size.
16380 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
16381 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16382 if (!CInc || CInc->getZExtValue() != NumBytes)
16383 continue;
16384
16385 // Create the new updating load/store node.
16386 // First, create an SDVTList for the new updating node's results.
16387 EVT Tys[6];
16388 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16389 unsigned n;
16390 for (n = 0; n < NumResultVecs; ++n)
16391 Tys[n] = VecTy;
16392 Tys[n++] = MVT::i32;
16393 Tys[n] = MVT::Other;
16394 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16395
16396 // Then, gather the new node's operands.
16398 Ops.push_back(N->getOperand(0)); // incoming chain
16399 Ops.push_back(N->getOperand(2)); // ptr
16400 Ops.push_back(Inc);
16401
16402 for (unsigned i = 3; i < N->getNumOperands(); ++i)
16403 Ops.push_back(N->getOperand(i));
16404
16405 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, VecTy,
16406 MemN->getMemOperand());
16407
16408 // Update the uses.
16409 SmallVector<SDValue, 5> NewResults;
16410 for (unsigned i = 0; i < NumResultVecs; ++i)
16411 NewResults.push_back(SDValue(UpdN.getNode(), i));
16412
16413 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16414 DCI.CombineTo(N, NewResults);
16415 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
16416
16417 break;
16418 }
16419
16420 return SDValue();
16421}
16422
16423/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
16424/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
16425/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
16426/// return true.
16428 SelectionDAG &DAG = DCI.DAG;
16429 EVT VT = N->getValueType(0);
16430 // vldN-dup instructions only support 64-bit vectors for N > 1.
16431 if (!VT.is64BitVector())
16432 return false;
16433
16434 // Check if the VDUPLANE operand is a vldN-dup intrinsic.
16435 SDNode *VLD = N->getOperand(0).getNode();
16436 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
16437 return false;
16438 unsigned NumVecs = 0;
16439 unsigned NewOpc = 0;
16440 unsigned IntNo = VLD->getConstantOperandVal(1);
16441 if (IntNo == Intrinsic::arm_neon_vld2lane) {
16442 NumVecs = 2;
16443 NewOpc = ARMISD::VLD2DUP;
16444 } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
16445 NumVecs = 3;
16446 NewOpc = ARMISD::VLD3DUP;
16447 } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
16448 NumVecs = 4;
16449 NewOpc = ARMISD::VLD4DUP;
16450 } else {
16451 return false;
16452 }
16453
16454 // First check that all the vldN-lane uses are VDUPLANEs and that the lane
16455 // numbers match the load.
16456 unsigned VLDLaneNo = VLD->getConstantOperandVal(NumVecs + 3);
16457 for (SDUse &Use : VLD->uses()) {
16458 // Ignore uses of the chain result.
16459 if (Use.getResNo() == NumVecs)
16460 continue;
16461 SDNode *User = Use.getUser();
16462 if (User->getOpcode() != ARMISD::VDUPLANE ||
16463 VLDLaneNo != User->getConstantOperandVal(1))
16464 return false;
16465 }
16466
16467 // Create the vldN-dup node.
16468 EVT Tys[5];
16469 unsigned n;
16470 for (n = 0; n < NumVecs; ++n)
16471 Tys[n] = VT;
16472 Tys[n] = MVT::Other;
16473 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumVecs + 1));
16474 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
16475 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
16476 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
16477 Ops, VLDMemInt->getMemoryVT(),
16478 VLDMemInt->getMemOperand());
16479
16480 // Update the uses.
16481 for (SDUse &Use : VLD->uses()) {
16482 unsigned ResNo = Use.getResNo();
16483 // Ignore uses of the chain result.
16484 if (ResNo == NumVecs)
16485 continue;
16486 DCI.CombineTo(Use.getUser(), SDValue(VLDDup.getNode(), ResNo));
16487 }
16488
16489 // Now the vldN-lane intrinsic is dead except for its chain result.
16490 // Update uses of the chain.
16491 std::vector<SDValue> VLDDupResults;
16492 for (unsigned n = 0; n < NumVecs; ++n)
16493 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
16494 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
16495 DCI.CombineTo(VLD, VLDDupResults);
16496
16497 return true;
16498}
16499
16500/// PerformVDUPLANECombine - Target-specific dag combine xforms for
16501/// ARMISD::VDUPLANE.
16504 const ARMSubtarget *Subtarget) {
16505 SDValue Op = N->getOperand(0);
16506 EVT VT = N->getValueType(0);
16507
16508 // On MVE, we just convert the VDUPLANE to a VDUP with an extract.
16509 if (Subtarget->hasMVEIntegerOps()) {
16510 EVT ExtractVT = VT.getVectorElementType();
16511 // We need to ensure we are creating a legal type.
16512 if (!DCI.DAG.getTargetLoweringInfo().isTypeLegal(ExtractVT))
16513 ExtractVT = MVT::i32;
16514 SDValue Extract = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ExtractVT,
16515 N->getOperand(0), N->getOperand(1));
16516 return DCI.DAG.getNode(ARMISD::VDUP, SDLoc(N), VT, Extract);
16517 }
16518
16519 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
16520 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
16521 if (CombineVLDDUP(N, DCI))
16522 return SDValue(N, 0);
16523
16524 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
16525 // redundant. Ignore bit_converts for now; element sizes are checked below.
16526 while (Op.getOpcode() == ISD::BITCAST)
16527 Op = Op.getOperand(0);
16528 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
16529 return SDValue();
16530
16531 // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
16532 unsigned EltSize = Op.getScalarValueSizeInBits();
16533 // The canonical VMOV for a zero vector uses a 32-bit element size.
16534 unsigned Imm = Op.getConstantOperandVal(0);
16535 unsigned EltBits;
16536 if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0)
16537 EltSize = 8;
16538 if (EltSize > VT.getScalarSizeInBits())
16539 return SDValue();
16540
16541 return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
16542}
16543
16544/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
16546 const ARMSubtarget *Subtarget) {
16547 SDValue Op = N->getOperand(0);
16548 SDLoc dl(N);
16549
16550 if (Subtarget->hasMVEIntegerOps()) {
16551 // Convert VDUP f32 -> VDUP BITCAST i32 under MVE, as we know the value will
16552 // need to come from a GPR.
16553 if (Op.getValueType() == MVT::f32)
16554 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16555 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op));
16556 else if (Op.getValueType() == MVT::f16)
16557 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16558 DAG.getNode(ARMISD::VMOVrh, dl, MVT::i32, Op));
16559 }
16560
16561 if (!Subtarget->hasNEON())
16562 return SDValue();
16563
16564 // Match VDUP(LOAD) -> VLD1DUP.
16565 // We match this pattern here rather than waiting for isel because the
16566 // transform is only legal for unindexed loads.
16567 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
16568 if (LD && Op.hasOneUse() && LD->isUnindexed() &&
16569 LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
16570 SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),
16571 DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)};
16572 SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
16573 SDValue VLDDup =
16574 DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, Ops,
16575 LD->getMemoryVT(), LD->getMemOperand());
16576 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
16577 return VLDDup;
16578 }
16579
16580 return SDValue();
16581}
16582
16585 const ARMSubtarget *Subtarget) {
16586 EVT VT = N->getValueType(0);
16587
16588 // If this is a legal vector load, try to combine it into a VLD1_UPD.
16589 if (Subtarget->hasNEON() && ISD::isNormalLoad(N) && VT.isVector() &&
16591 return CombineBaseUpdate(N, DCI);
16592
16593 return SDValue();
16594}
16595
16596// Optimize trunc store (of multiple scalars) to shuffle and store. First,
16597// pack all of the elements in one place. Next, store to memory in fewer
16598// chunks.
16600 SelectionDAG &DAG) {
16601 SDValue StVal = St->getValue();
16602 EVT VT = StVal.getValueType();
16603 if (!St->isTruncatingStore() || !VT.isVector())
16604 return SDValue();
16605 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16606 EVT StVT = St->getMemoryVT();
16607 unsigned NumElems = VT.getVectorNumElements();
16608 assert(StVT != VT && "Cannot truncate to the same type");
16609 unsigned FromEltSz = VT.getScalarSizeInBits();
16610 unsigned ToEltSz = StVT.getScalarSizeInBits();
16611
16612 // From, To sizes and ElemCount must be pow of two
16613 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz))
16614 return SDValue();
16615
16616 // We are going to use the original vector elt for storing.
16617 // Accumulated smaller vector elements must be a multiple of the store size.
16618 if (0 != (NumElems * FromEltSz) % ToEltSz)
16619 return SDValue();
16620
16621 unsigned SizeRatio = FromEltSz / ToEltSz;
16622 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
16623
16624 // Create a type on which we perform the shuffle.
16625 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
16626 NumElems * SizeRatio);
16627 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
16628
16629 SDLoc DL(St);
16630 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
16631 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
16632 for (unsigned i = 0; i < NumElems; ++i)
16633 ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1
16634 : i * SizeRatio;
16635
16636 // Can't shuffle using an illegal type.
16637 if (!TLI.isTypeLegal(WideVecVT))
16638 return SDValue();
16639
16640 SDValue Shuff = DAG.getVectorShuffle(
16641 WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec);
16642 // At this point all of the data is stored at the bottom of the
16643 // register. We now need to save it to mem.
16644
16645 // Find the largest store unit
16646 MVT StoreType = MVT::i8;
16647 for (MVT Tp : MVT::integer_valuetypes()) {
16648 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
16649 StoreType = Tp;
16650 }
16651 // Didn't find a legal store type.
16652 if (!TLI.isTypeLegal(StoreType))
16653 return SDValue();
16654
16655 // Bitcast the original vector into a vector of store-size units
16656 EVT StoreVecVT =
16657 EVT::getVectorVT(*DAG.getContext(), StoreType,
16658 VT.getSizeInBits() / EVT(StoreType).getSizeInBits());
16659 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
16660 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
16662 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
16663 TLI.getPointerTy(DAG.getDataLayout()));
16664 SDValue BasePtr = St->getBasePtr();
16665
16666 // Perform one or more big stores into memory.
16667 unsigned E = (ToEltSz * NumElems) / StoreType.getSizeInBits();
16668 for (unsigned I = 0; I < E; I++) {
16669 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType,
16670 ShuffWide, DAG.getIntPtrConstant(I, DL));
16671 SDValue Ch =
16672 DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
16673 St->getAlign(), St->getMemOperand()->getFlags());
16674 BasePtr =
16675 DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment);
16676 Chains.push_back(Ch);
16677 }
16678 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
16679}
16680
16681// Try taking a single vector store from an fpround (which would otherwise turn
16682// into an expensive buildvector) and splitting it into a series of narrowing
16683// stores.
16685 SelectionDAG &DAG) {
16686 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16687 return SDValue();
16688 SDValue Trunc = St->getValue();
16689 if (Trunc->getOpcode() != ISD::FP_ROUND)
16690 return SDValue();
16691 EVT FromVT = Trunc->getOperand(0).getValueType();
16692 EVT ToVT = Trunc.getValueType();
16693 if (!ToVT.isVector())
16694 return SDValue();
16696 EVT ToEltVT = ToVT.getVectorElementType();
16697 EVT FromEltVT = FromVT.getVectorElementType();
16698
16699 if (FromEltVT != MVT::f32 || ToEltVT != MVT::f16)
16700 return SDValue();
16701
16702 unsigned NumElements = 4;
16703 if (FromVT.getVectorNumElements() % NumElements != 0)
16704 return SDValue();
16705
16706 // Test if the Trunc will be convertable to a VMOVN with a shuffle, and if so
16707 // use the VMOVN over splitting the store. We are looking for patterns of:
16708 // !rev: 0 N 1 N+1 2 N+2 ...
16709 // rev: N 0 N+1 1 N+2 2 ...
16710 // The shuffle may either be a single source (in which case N = NumElts/2) or
16711 // two inputs extended with concat to the same size (in which case N =
16712 // NumElts).
16713 auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) {
16714 ArrayRef<int> M = SVN->getMask();
16715 unsigned NumElts = ToVT.getVectorNumElements();
16716 if (SVN->getOperand(1).isUndef())
16717 NumElts /= 2;
16718
16719 unsigned Off0 = Rev ? NumElts : 0;
16720 unsigned Off1 = Rev ? 0 : NumElts;
16721
16722 for (unsigned I = 0; I < NumElts; I += 2) {
16723 if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2))
16724 return false;
16725 if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2))
16726 return false;
16727 }
16728
16729 return true;
16730 };
16731
16732 if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0)))
16733 if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true))
16734 return SDValue();
16735
16736 LLVMContext &C = *DAG.getContext();
16737 SDLoc DL(St);
16738 // Details about the old store
16739 SDValue Ch = St->getChain();
16740 SDValue BasePtr = St->getBasePtr();
16741 Align Alignment = St->getOriginalAlign();
16743 AAMDNodes AAInfo = St->getAAInfo();
16744
16745 // We split the store into slices of NumElements. fp16 trunc stores are vcvt
16746 // and then stored as truncating integer stores.
16747 EVT NewFromVT = EVT::getVectorVT(C, FromEltVT, NumElements);
16748 EVT NewToVT = EVT::getVectorVT(
16749 C, EVT::getIntegerVT(C, ToEltVT.getSizeInBits()), NumElements);
16750
16752 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
16753 unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
16754 SDValue NewPtr =
16755 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16756
16757 SDValue Extract =
16758 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
16759 DAG.getConstant(i * NumElements, DL, MVT::i32));
16760
16761 SDValue FPTrunc =
16762 DAG.getNode(ARMISD::VCVTN, DL, MVT::v8f16, DAG.getUNDEF(MVT::v8f16),
16763 Extract, DAG.getConstant(0, DL, MVT::i32));
16764 Extract = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v4i32, FPTrunc);
16765
16766 SDValue Store = DAG.getTruncStore(
16767 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16768 NewToVT, Alignment, MMOFlags, AAInfo);
16769 Stores.push_back(Store);
16770 }
16771 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16772}
16773
16774// Try taking a single vector store from an MVETRUNC (which would otherwise turn
16775// into an expensive buildvector) and splitting it into a series of narrowing
16776// stores.
16778 SelectionDAG &DAG) {
16779 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16780 return SDValue();
16781 SDValue Trunc = St->getValue();
16782 if (Trunc->getOpcode() != ARMISD::MVETRUNC)
16783 return SDValue();
16784 EVT FromVT = Trunc->getOperand(0).getValueType();
16785 EVT ToVT = Trunc.getValueType();
16786
16787 LLVMContext &C = *DAG.getContext();
16788 SDLoc DL(St);
16789 // Details about the old store
16790 SDValue Ch = St->getChain();
16791 SDValue BasePtr = St->getBasePtr();
16792 Align Alignment = St->getOriginalAlign();
16794 AAMDNodes AAInfo = St->getAAInfo();
16795
16796 EVT NewToVT = EVT::getVectorVT(C, ToVT.getVectorElementType(),
16797 FromVT.getVectorNumElements());
16798
16800 for (unsigned i = 0; i < Trunc.getNumOperands(); i++) {
16801 unsigned NewOffset =
16802 i * FromVT.getVectorNumElements() * ToVT.getScalarSizeInBits() / 8;
16803 SDValue NewPtr =
16804 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16805
16806 SDValue Extract = Trunc.getOperand(i);
16807 SDValue Store = DAG.getTruncStore(
16808 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16809 NewToVT, Alignment, MMOFlags, AAInfo);
16810 Stores.push_back(Store);
16811 }
16812 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16813}
16814
16815// Given a floating point store from an extracted vector, with an integer
16816// VGETLANE that already exists, store the existing VGETLANEu directly. This can
16817// help reduce fp register pressure, doesn't require the fp extract and allows
16818// use of more integer post-inc stores not available with vstr.
16820 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16821 return SDValue();
16822 SDValue Extract = St->getValue();
16823 EVT VT = Extract.getValueType();
16824 // For now only uses f16. This may be useful for f32 too, but that will
16825 // be bitcast(extract), not the VGETLANEu we currently check here.
16826 if (VT != MVT::f16 || Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16827 return SDValue();
16828
16829 SDNode *GetLane =
16830 DAG.getNodeIfExists(ARMISD::VGETLANEu, DAG.getVTList(MVT::i32),
16831 {Extract.getOperand(0), Extract.getOperand(1)});
16832 if (!GetLane)
16833 return SDValue();
16834
16835 LLVMContext &C = *DAG.getContext();
16836 SDLoc DL(St);
16837 // Create a new integer store to replace the existing floating point version.
16838 SDValue Ch = St->getChain();
16839 SDValue BasePtr = St->getBasePtr();
16840 Align Alignment = St->getOriginalAlign();
16842 AAMDNodes AAInfo = St->getAAInfo();
16843 EVT NewToVT = EVT::getIntegerVT(C, VT.getSizeInBits());
16844 SDValue Store = DAG.getTruncStore(Ch, DL, SDValue(GetLane, 0), BasePtr,
16845 St->getPointerInfo(), NewToVT, Alignment,
16846 MMOFlags, AAInfo);
16847
16848 return Store;
16849}
16850
16851/// PerformSTORECombine - Target-specific dag combine xforms for
16852/// ISD::STORE.
16855 const ARMSubtarget *Subtarget) {
16856 StoreSDNode *St = cast<StoreSDNode>(N);
16857 if (St->isVolatile())
16858 return SDValue();
16859 SDValue StVal = St->getValue();
16860 EVT VT = StVal.getValueType();
16861
16862 if (Subtarget->hasNEON())
16863 if (SDValue Store = PerformTruncatingStoreCombine(St, DCI.DAG))
16864 return Store;
16865
16866 if (Subtarget->hasMVEFloatOps())
16867 if (SDValue NewToken = PerformSplittingToNarrowingStores(St, DCI.DAG))
16868 return NewToken;
16869
16870 if (Subtarget->hasMVEIntegerOps()) {
16871 if (SDValue NewChain = PerformExtractFpToIntStores(St, DCI.DAG))
16872 return NewChain;
16873 if (SDValue NewToken =
16875 return NewToken;
16876 }
16877
16878 if (!ISD::isNormalStore(St))
16879 return SDValue();
16880
16881 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
16882 // ARM stores of arguments in the same cache line.
16883 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
16884 StVal.getNode()->hasOneUse()) {
16885 SelectionDAG &DAG = DCI.DAG;
16886 bool isBigEndian = DAG.getDataLayout().isBigEndian();
16887 SDLoc DL(St);
16888 SDValue BasePtr = St->getBasePtr();
16889 SDValue NewST1 = DAG.getStore(
16890 St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
16891 BasePtr, St->getPointerInfo(), St->getOriginalAlign(),
16892 St->getMemOperand()->getFlags());
16893
16894 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
16895 DAG.getConstant(4, DL, MVT::i32));
16896 return DAG.getStore(NewST1.getValue(0), DL,
16897 StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
16898 OffsetPtr, St->getPointerInfo().getWithOffset(4),
16899 St->getOriginalAlign(),
16900 St->getMemOperand()->getFlags());
16901 }
16902
16903 if (StVal.getValueType() == MVT::i64 &&
16905
16906 // Bitcast an i64 store extracted from a vector to f64.
16907 // Otherwise, the i64 value will be legalized to a pair of i32 values.
16908 SelectionDAG &DAG = DCI.DAG;
16909 SDLoc dl(StVal);
16910 SDValue IntVec = StVal.getOperand(0);
16911 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
16913 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
16914 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
16915 Vec, StVal.getOperand(1));
16916 dl = SDLoc(N);
16917 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
16918 // Make the DAGCombiner fold the bitcasts.
16919 DCI.AddToWorklist(Vec.getNode());
16920 DCI.AddToWorklist(ExtElt.getNode());
16921 DCI.AddToWorklist(V.getNode());
16922 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
16923 St->getPointerInfo(), St->getAlign(),
16924 St->getMemOperand()->getFlags(), St->getAAInfo());
16925 }
16926
16927 // If this is a legal vector store, try to combine it into a VST1_UPD.
16928 if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() &&
16930 return CombineBaseUpdate(N, DCI);
16931
16932 return SDValue();
16933}
16934
16935/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
16936/// can replace combinations of VMUL and VCVT (floating-point to integer)
16937/// when the VMUL has a constant operand that is a power of 2.
16938///
16939/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
16940/// vmul.f32 d16, d17, d16
16941/// vcvt.s32.f32 d16, d16
16942/// becomes:
16943/// vcvt.s32.f32 d16, d16, #3
16945 const ARMSubtarget *Subtarget) {
16946 if (!Subtarget->hasNEON())
16947 return SDValue();
16948
16949 SDValue Op = N->getOperand(0);
16950 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
16951 Op.getOpcode() != ISD::FMUL)
16952 return SDValue();
16953
16954 SDValue ConstVec = Op->getOperand(1);
16955 if (!isa<BuildVectorSDNode>(ConstVec))
16956 return SDValue();
16957
16958 MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
16959 uint32_t FloatBits = FloatTy.getSizeInBits();
16960 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
16961 uint32_t IntBits = IntTy.getSizeInBits();
16962 unsigned NumLanes = Op.getValueType().getVectorNumElements();
16963 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16964 // These instructions only exist converting from f32 to i32. We can handle
16965 // smaller integers by generating an extra truncate, but larger ones would
16966 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
16967 // these intructions only support v2i32/v4i32 types.
16968 return SDValue();
16969 }
16970
16971 BitVector UndefElements;
16972 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
16973 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
16974 if (C == -1 || C == 0 || C > 32)
16975 return SDValue();
16976
16977 SDLoc dl(N);
16978 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
16979 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
16980 Intrinsic::arm_neon_vcvtfp2fxu;
16981 SDValue FixConv = DAG.getNode(
16982 ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
16983 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
16984 DAG.getConstant(C, dl, MVT::i32));
16985
16986 if (IntBits < FloatBits)
16987 FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
16988
16989 return FixConv;
16990}
16991
16993 const ARMSubtarget *Subtarget) {
16994 if (!Subtarget->hasMVEFloatOps())
16995 return SDValue();
16996
16997 // Turn (fadd x, (vselect c, y, -0.0)) into (vselect c, (fadd x, y), x)
16998 // The second form can be more easily turned into a predicated vadd, and
16999 // possibly combined into a fma to become a predicated vfma.
17000 SDValue Op0 = N->getOperand(0);
17001 SDValue Op1 = N->getOperand(1);
17002 EVT VT = N->getValueType(0);
17003 SDLoc DL(N);
17004
17005 // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set,
17006 // which these VMOV's represent.
17007 auto isIdentitySplat = [&](SDValue Op, bool NSZ) {
17008 if (Op.getOpcode() != ISD::BITCAST ||
17009 Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)
17010 return false;
17011 uint64_t ImmVal = Op.getOperand(0).getConstantOperandVal(0);
17012 if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ)))
17013 return true;
17014 if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ)))
17015 return true;
17016 return false;
17017 };
17018
17019 if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT)
17020 std::swap(Op0, Op1);
17021
17022 if (Op1.getOpcode() != ISD::VSELECT)
17023 return SDValue();
17024
17025 SDNodeFlags FaddFlags = N->getFlags();
17026 bool NSZ = FaddFlags.hasNoSignedZeros();
17027 if (!isIdentitySplat(Op1.getOperand(2), NSZ))
17028 return SDValue();
17029
17030 SDValue FAdd =
17031 DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), FaddFlags);
17032 return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0, FaddFlags);
17033}
17034
17036 SDValue LHS = N->getOperand(0);
17037 SDValue RHS = N->getOperand(1);
17038 EVT VT = N->getValueType(0);
17039 SDLoc DL(N);
17040
17041 if (!N->getFlags().hasAllowReassociation())
17042 return SDValue();
17043
17044 // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)
17045 auto ReassocComplex = [&](SDValue A, SDValue B) {
17046 if (A.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
17047 return SDValue();
17048 unsigned Opc = A.getConstantOperandVal(0);
17049 if (Opc != Intrinsic::arm_mve_vcmlaq)
17050 return SDValue();
17051 SDValue VCMLA = DAG.getNode(
17052 ISD::INTRINSIC_WO_CHAIN, DL, VT, A.getOperand(0), A.getOperand(1),
17053 DAG.getNode(ISD::FADD, DL, VT, A.getOperand(2), B, N->getFlags()),
17054 A.getOperand(3), A.getOperand(4));
17055 VCMLA->setFlags(A->getFlags());
17056 return VCMLA;
17057 };
17058 if (SDValue R = ReassocComplex(LHS, RHS))
17059 return R;
17060 if (SDValue R = ReassocComplex(RHS, LHS))
17061 return R;
17062
17063 return SDValue();
17064}
17065
17067 const ARMSubtarget *Subtarget) {
17068 if (SDValue S = PerformFAddVSelectCombine(N, DAG, Subtarget))
17069 return S;
17070 if (SDValue S = PerformFADDVCMLACombine(N, DAG))
17071 return S;
17072 return SDValue();
17073}
17074
17075/// PerformVMulVCTPCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
17076/// can replace combinations of VCVT (integer to floating-point) and VMUL
17077/// when the VMUL has a constant operand that is a power of 2.
17078///
17079/// Example (assume d17 = <float 0.125, float 0.125>):
17080/// vcvt.f32.s32 d16, d16
17081/// vmul.f32 d16, d16, d17
17082/// becomes:
17083/// vcvt.f32.s32 d16, d16, #3
17085 const ARMSubtarget *Subtarget) {
17086 if (!Subtarget->hasNEON())
17087 return SDValue();
17088
17089 SDValue Op = N->getOperand(0);
17090 unsigned OpOpcode = Op.getNode()->getOpcode();
17091 if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
17092 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
17093 return SDValue();
17094
17095 SDValue ConstVec = N->getOperand(1);
17096 if (!isa<BuildVectorSDNode>(ConstVec))
17097 return SDValue();
17098
17099 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
17100 uint32_t FloatBits = FloatTy.getSizeInBits();
17101 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
17102 uint32_t IntBits = IntTy.getSizeInBits();
17103 unsigned NumLanes = Op.getValueType().getVectorNumElements();
17104 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
17105 // These instructions only exist converting from i32 to f32. We can handle
17106 // smaller integers by generating an extra extend, but larger ones would
17107 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
17108 // these intructions only support v2i32/v4i32 types.
17109 return SDValue();
17110 }
17111
17112 ConstantFPSDNode *CN = isConstOrConstSplatFP(ConstVec, true);
17113 APFloat Recip(0.0f);
17114 if (!CN || !CN->getValueAPF().getExactInverse(&Recip))
17115 return SDValue();
17116
17117 bool IsExact;
17118 APSInt IntVal(33);
17119 if (Recip.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) !=
17120 APFloat::opOK ||
17121 !IsExact)
17122 return SDValue();
17123
17124 int32_t C = IntVal.exactLogBase2();
17125 if (C == -1 || C == 0 || C > 32)
17126 return SDValue();
17127
17128 SDLoc DL(N);
17129 bool isSigned = OpOpcode == ISD::SINT_TO_FP;
17130 SDValue ConvInput = Op.getOperand(0);
17131 if (IntBits < FloatBits)
17133 NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, ConvInput);
17134
17135 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp
17136 : Intrinsic::arm_neon_vcvtfxu2fp;
17137 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
17138 DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
17139 DAG.getConstant(C, DL, MVT::i32));
17140}
17141
17143 const ARMSubtarget *ST) {
17144 if (!ST->hasMVEIntegerOps())
17145 return SDValue();
17146
17147 assert(N->getOpcode() == ISD::VECREDUCE_ADD);
17148 EVT ResVT = N->getValueType(0);
17149 SDValue N0 = N->getOperand(0);
17150 SDLoc dl(N);
17151
17152 // Try to turn vecreduce_add(add(x, y)) into vecreduce(x) + vecreduce(y)
17153 if (ResVT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
17154 (N0.getValueType() == MVT::v4i32 || N0.getValueType() == MVT::v8i16 ||
17155 N0.getValueType() == MVT::v16i8)) {
17156 SDValue Red0 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(0));
17157 SDValue Red1 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(1));
17158 return DAG.getNode(ISD::ADD, dl, ResVT, Red0, Red1);
17159 }
17160
17161 // We are looking for something that will have illegal types if left alone,
17162 // but that we can convert to a single instruction under MVE. For example
17163 // vecreduce_add(sext(A, v8i32)) => VADDV.s16 A
17164 // or
17165 // vecreduce_add(mul(zext(A, v16i32), zext(B, v16i32))) => VMLADAV.u8 A, B
17166
17167 // The legal cases are:
17168 // VADDV u/s 8/16/32
17169 // VMLAV u/s 8/16/32
17170 // VADDLV u/s 32
17171 // VMLALV u/s 16/32
17172
17173 // If the input vector is smaller than legal (v4i8/v4i16 for example) we can
17174 // extend it and use v4i32 instead.
17175 auto ExtTypeMatches = [](SDValue A, ArrayRef<MVT> ExtTypes) {
17176 EVT AVT = A.getValueType();
17177 return any_of(ExtTypes, [&](MVT Ty) {
17178 return AVT.getVectorNumElements() == Ty.getVectorNumElements() &&
17179 AVT.bitsLE(Ty);
17180 });
17181 };
17182 auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) {
17183 EVT AVT = A.getValueType();
17184 if (!AVT.is128BitVector())
17185 A = DAG.getNode(ExtendCode, dl,
17187 128 / AVT.getVectorMinNumElements())),
17188 A);
17189 return A;
17190 };
17191 auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes) {
17192 if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
17193 return SDValue();
17194 SDValue A = N0->getOperand(0);
17195 if (ExtTypeMatches(A, ExtTypes))
17196 return ExtendIfNeeded(A, ExtendCode);
17197 return SDValue();
17198 };
17199 auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode,
17200 ArrayRef<MVT> ExtTypes, SDValue &Mask) {
17201 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17203 return SDValue();
17204 Mask = N0->getOperand(0);
17205 SDValue Ext = N0->getOperand(1);
17206 if (Ext->getOpcode() != ExtendCode)
17207 return SDValue();
17208 SDValue A = Ext->getOperand(0);
17209 if (ExtTypeMatches(A, ExtTypes))
17210 return ExtendIfNeeded(A, ExtendCode);
17211 return SDValue();
17212 };
17213 auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17214 SDValue &A, SDValue &B) {
17215 // For a vmla we are trying to match a larger pattern:
17216 // ExtA = sext/zext A
17217 // ExtB = sext/zext B
17218 // Mul = mul ExtA, ExtB
17219 // vecreduce.add Mul
17220 // There might also be en extra extend between the mul and the addreduce, so
17221 // long as the bitwidth is high enough to make them equivalent (for example
17222 // original v8i16 might be mul at v8i32 and the reduce happens at v8i64).
17223 if (ResVT != RetTy)
17224 return false;
17225 SDValue Mul = N0;
17226 if (Mul->getOpcode() == ExtendCode &&
17227 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17228 ResVT.getScalarSizeInBits())
17229 Mul = Mul->getOperand(0);
17230 if (Mul->getOpcode() != ISD::MUL)
17231 return false;
17232 SDValue ExtA = Mul->getOperand(0);
17233 SDValue ExtB = Mul->getOperand(1);
17234 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17235 return false;
17236 A = ExtA->getOperand(0);
17237 B = ExtB->getOperand(0);
17238 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17239 A = ExtendIfNeeded(A, ExtendCode);
17240 B = ExtendIfNeeded(B, ExtendCode);
17241 return true;
17242 }
17243 return false;
17244 };
17245 auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17246 SDValue &A, SDValue &B, SDValue &Mask) {
17247 // Same as the pattern above with a select for the zero predicated lanes
17248 // ExtA = sext/zext A
17249 // ExtB = sext/zext B
17250 // Mul = mul ExtA, ExtB
17251 // N0 = select Mask, Mul, 0
17252 // vecreduce.add N0
17253 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17255 return false;
17256 Mask = N0->getOperand(0);
17257 SDValue Mul = N0->getOperand(1);
17258 if (Mul->getOpcode() == ExtendCode &&
17259 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17260 ResVT.getScalarSizeInBits())
17261 Mul = Mul->getOperand(0);
17262 if (Mul->getOpcode() != ISD::MUL)
17263 return false;
17264 SDValue ExtA = Mul->getOperand(0);
17265 SDValue ExtB = Mul->getOperand(1);
17266 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17267 return false;
17268 A = ExtA->getOperand(0);
17269 B = ExtB->getOperand(0);
17270 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17271 A = ExtendIfNeeded(A, ExtendCode);
17272 B = ExtendIfNeeded(B, ExtendCode);
17273 return true;
17274 }
17275 return false;
17276 };
17277 auto Create64bitNode = [&](unsigned Opcode, ArrayRef<SDValue> Ops) {
17278 // Split illegal MVT::v16i8->i64 vector reductions into two legal v8i16->i64
17279 // reductions. The operands are extended with MVEEXT, but as they are
17280 // reductions the lane orders do not matter. MVEEXT may be combined with
17281 // loads to produce two extending loads, or else they will be expanded to
17282 // VREV/VMOVL.
17283 EVT VT = Ops[0].getValueType();
17284 if (VT == MVT::v16i8) {
17285 assert((Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) &&
17286 "Unexpected illegal long reduction opcode");
17287 bool IsUnsigned = Opcode == ARMISD::VMLALVu;
17288
17289 SDValue Ext0 =
17290 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17291 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[0]);
17292 SDValue Ext1 =
17293 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17294 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[1]);
17295
17296 SDValue MLA0 = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
17297 Ext0, Ext1);
17298 SDValue MLA1 =
17299 DAG.getNode(IsUnsigned ? ARMISD::VMLALVAu : ARMISD::VMLALVAs, dl,
17300 DAG.getVTList(MVT::i32, MVT::i32), MLA0, MLA0.getValue(1),
17301 Ext0.getValue(1), Ext1.getValue(1));
17302 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, MLA1, MLA1.getValue(1));
17303 }
17304 SDValue Node = DAG.getNode(Opcode, dl, {MVT::i32, MVT::i32}, Ops);
17305 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Node,
17306 SDValue(Node.getNode(), 1));
17307 };
17308
17309 SDValue A, B;
17310 SDValue Mask;
17311 if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17312 return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
17313 if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17314 return DAG.getNode(ARMISD::VMLAVu, dl, ResVT, A, B);
17315 if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17316 A, B))
17317 return Create64bitNode(ARMISD::VMLALVs, {A, B});
17318 if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17319 A, B))
17320 return Create64bitNode(ARMISD::VMLALVu, {A, B});
17321 if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B))
17322 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17323 DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B));
17324 if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B))
17325 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17326 DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B));
17327
17328 if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17329 Mask))
17330 return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask);
17331 if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17332 Mask))
17333 return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask);
17334 if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17335 Mask))
17336 return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask});
17337 if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17338 Mask))
17339 return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask});
17340 if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask))
17341 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17342 DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask));
17343 if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask))
17344 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17345 DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
17346
17347 if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
17348 return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
17349 if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
17350 return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
17351 if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
17352 return Create64bitNode(ARMISD::VADDLVs, {A});
17353 if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
17354 return Create64bitNode(ARMISD::VADDLVu, {A});
17355 if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
17356 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17357 DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
17358 if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
17359 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17360 DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
17361
17362 if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17363 return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
17364 if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17365 return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
17366 if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))
17367 return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
17368 if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))
17369 return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
17370 if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
17371 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17372 DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
17373 if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
17374 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17375 DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
17376
17377 // Some complications. We can get a case where the two inputs of the mul are
17378 // the same, then the output sext will have been helpfully converted to a
17379 // zext. Turn it back.
17380 SDValue Op = N0;
17381 if (Op->getOpcode() == ISD::VSELECT)
17382 Op = Op->getOperand(1);
17383 if (Op->getOpcode() == ISD::ZERO_EXTEND &&
17384 Op->getOperand(0)->getOpcode() == ISD::MUL) {
17385 SDValue Mul = Op->getOperand(0);
17386 if (Mul->getOperand(0) == Mul->getOperand(1) &&
17387 Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
17388 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
17389 if (Op != N0)
17390 Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
17391 N0->getOperand(0), Ext, N0->getOperand(2));
17392 return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext);
17393 }
17394 }
17395
17396 return SDValue();
17397}
17398
17399// Looks for vaddv(shuffle) or vmlav(shuffle, shuffle), with a shuffle where all
17400// the lanes are used. Due to the reduction being commutative the shuffle can be
17401// removed.
17403 unsigned VecOp = N->getOperand(0).getValueType().isVector() ? 0 : 2;
17404 auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp));
17405 if (!Shuf || !Shuf->getOperand(1).isUndef())
17406 return SDValue();
17407
17408 // Check all elements are used once in the mask.
17409 ArrayRef<int> Mask = Shuf->getMask();
17410 APInt SetElts(Mask.size(), 0);
17411 for (int E : Mask) {
17412 if (E < 0 || E >= (int)Mask.size())
17413 return SDValue();
17414 SetElts.setBit(E);
17415 }
17416 if (!SetElts.isAllOnes())
17417 return SDValue();
17418
17419 if (N->getNumOperands() != VecOp + 1) {
17420 auto *Shuf2 = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp + 1));
17421 if (!Shuf2 || !Shuf2->getOperand(1).isUndef() || Shuf2->getMask() != Mask)
17422 return SDValue();
17423 }
17424
17426 for (SDValue Op : N->ops()) {
17427 if (Op.getValueType().isVector())
17428 Ops.push_back(Op.getOperand(0));
17429 else
17430 Ops.push_back(Op);
17431 }
17432 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops);
17433}
17434
17437 SDValue Op0 = N->getOperand(0);
17438 SDValue Op1 = N->getOperand(1);
17439 unsigned IsTop = N->getConstantOperandVal(2);
17440
17441 // VMOVNT a undef -> a
17442 // VMOVNB a undef -> a
17443 // VMOVNB undef a -> a
17444 if (Op1->isUndef())
17445 return Op0;
17446 if (Op0->isUndef() && !IsTop)
17447 return Op1;
17448
17449 // VMOVNt(c, VQMOVNb(a, b)) => VQMOVNt(c, b)
17450 // VMOVNb(c, VQMOVNb(a, b)) => VQMOVNb(c, b)
17451 if ((Op1->getOpcode() == ARMISD::VQMOVNs ||
17452 Op1->getOpcode() == ARMISD::VQMOVNu) &&
17453 Op1->getConstantOperandVal(2) == 0)
17454 return DCI.DAG.getNode(Op1->getOpcode(), SDLoc(Op1), N->getValueType(0),
17455 Op0, Op1->getOperand(1), N->getOperand(2));
17456
17457 // Only the bottom lanes from Qm (Op1) and either the top or bottom lanes from
17458 // Qd (Op0) are demanded from a VMOVN, depending on whether we are inserting
17459 // into the top or bottom lanes.
17460 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17461 APInt Op1DemandedElts = APInt::getSplat(NumElts, APInt::getLowBitsSet(2, 1));
17462 APInt Op0DemandedElts =
17463 IsTop ? Op1DemandedElts
17464 : APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1));
17465
17466 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17467 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17468 return SDValue(N, 0);
17469 if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, DCI))
17470 return SDValue(N, 0);
17471
17472 return SDValue();
17473}
17474
17477 SDValue Op0 = N->getOperand(0);
17478 unsigned IsTop = N->getConstantOperandVal(2);
17479
17480 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17481 APInt Op0DemandedElts =
17482 APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
17483 : APInt::getHighBitsSet(2, 1));
17484
17485 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17486 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17487 return SDValue(N, 0);
17488 return SDValue();
17489}
17490
17493 EVT VT = N->getValueType(0);
17494 SDValue LHS = N->getOperand(0);
17495 SDValue RHS = N->getOperand(1);
17496
17497 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
17498 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
17499 // Turn VQDMULH(shuffle, shuffle) -> shuffle(VQDMULH)
17500 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
17501 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
17502 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
17503 SDLoc DL(N);
17504 SDValue NewBinOp = DCI.DAG.getNode(N->getOpcode(), DL, VT,
17505 LHS.getOperand(0), RHS.getOperand(0));
17506 SDValue UndefV = LHS.getOperand(1);
17507 return DCI.DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
17508 }
17509 return SDValue();
17510}
17511
17513 SDLoc DL(N);
17514 SDValue Op0 = N->getOperand(0);
17515 SDValue Op1 = N->getOperand(1);
17516
17517 // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from
17518 // uses of the intrinsics.
17519 if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
17520 int ShiftAmt = C->getSExtValue();
17521 if (ShiftAmt == 0) {
17522 SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL);
17523 DAG.ReplaceAllUsesWith(N, Merge.getNode());
17524 return SDValue();
17525 }
17526
17527 if (ShiftAmt >= -32 && ShiftAmt < 0) {
17528 unsigned NewOpcode =
17529 N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
17530 SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,
17531 DAG.getConstant(-ShiftAmt, DL, MVT::i32));
17532 DAG.ReplaceAllUsesWith(N, NewShift.getNode());
17533 return NewShift;
17534 }
17535 }
17536
17537 return SDValue();
17538}
17539
17540/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
17542 DAGCombinerInfo &DCI) const {
17543 SelectionDAG &DAG = DCI.DAG;
17544 unsigned IntNo = N->getConstantOperandVal(0);
17545 switch (IntNo) {
17546 default:
17547 // Don't do anything for most intrinsics.
17548 break;
17549
17550 // Vector shifts: check for immediate versions and lower them.
17551 // Note: This is done during DAG combining instead of DAG legalizing because
17552 // the build_vectors for 64-bit vector element shift counts are generally
17553 // not legal, and it is hard to see their values after they get legalized to
17554 // loads from a constant pool.
17555 case Intrinsic::arm_neon_vshifts:
17556 case Intrinsic::arm_neon_vshiftu:
17557 case Intrinsic::arm_neon_vrshifts:
17558 case Intrinsic::arm_neon_vrshiftu:
17559 case Intrinsic::arm_neon_vrshiftn:
17560 case Intrinsic::arm_neon_vqshifts:
17561 case Intrinsic::arm_neon_vqshiftu:
17562 case Intrinsic::arm_neon_vqshiftsu:
17563 case Intrinsic::arm_neon_vqshiftns:
17564 case Intrinsic::arm_neon_vqshiftnu:
17565 case Intrinsic::arm_neon_vqshiftnsu:
17566 case Intrinsic::arm_neon_vqrshiftns:
17567 case Intrinsic::arm_neon_vqrshiftnu:
17568 case Intrinsic::arm_neon_vqrshiftnsu: {
17569 EVT VT = N->getOperand(1).getValueType();
17570 int64_t Cnt;
17571 unsigned VShiftOpc = 0;
17572
17573 switch (IntNo) {
17574 case Intrinsic::arm_neon_vshifts:
17575 case Intrinsic::arm_neon_vshiftu:
17576 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
17577 VShiftOpc = ARMISD::VSHLIMM;
17578 break;
17579 }
17580 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
17581 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
17583 break;
17584 }
17585 return SDValue();
17586
17587 case Intrinsic::arm_neon_vrshifts:
17588 case Intrinsic::arm_neon_vrshiftu:
17589 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
17590 break;
17591 return SDValue();
17592
17593 case Intrinsic::arm_neon_vqshifts:
17594 case Intrinsic::arm_neon_vqshiftu:
17595 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17596 break;
17597 return SDValue();
17598
17599 case Intrinsic::arm_neon_vqshiftsu:
17600 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17601 break;
17602 llvm_unreachable("invalid shift count for vqshlu intrinsic");
17603
17604 case Intrinsic::arm_neon_vrshiftn:
17605 case Intrinsic::arm_neon_vqshiftns:
17606 case Intrinsic::arm_neon_vqshiftnu:
17607 case Intrinsic::arm_neon_vqshiftnsu:
17608 case Intrinsic::arm_neon_vqrshiftns:
17609 case Intrinsic::arm_neon_vqrshiftnu:
17610 case Intrinsic::arm_neon_vqrshiftnsu:
17611 // Narrowing shifts require an immediate right shift.
17612 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
17613 break;
17614 llvm_unreachable("invalid shift count for narrowing vector shift "
17615 "intrinsic");
17616
17617 default:
17618 llvm_unreachable("unhandled vector shift");
17619 }
17620
17621 switch (IntNo) {
17622 case Intrinsic::arm_neon_vshifts:
17623 case Intrinsic::arm_neon_vshiftu:
17624 // Opcode already set above.
17625 break;
17626 case Intrinsic::arm_neon_vrshifts:
17627 VShiftOpc = ARMISD::VRSHRsIMM;
17628 break;
17629 case Intrinsic::arm_neon_vrshiftu:
17630 VShiftOpc = ARMISD::VRSHRuIMM;
17631 break;
17632 case Intrinsic::arm_neon_vrshiftn:
17633 VShiftOpc = ARMISD::VRSHRNIMM;
17634 break;
17635 case Intrinsic::arm_neon_vqshifts:
17636 VShiftOpc = ARMISD::VQSHLsIMM;
17637 break;
17638 case Intrinsic::arm_neon_vqshiftu:
17639 VShiftOpc = ARMISD::VQSHLuIMM;
17640 break;
17641 case Intrinsic::arm_neon_vqshiftsu:
17642 VShiftOpc = ARMISD::VQSHLsuIMM;
17643 break;
17644 case Intrinsic::arm_neon_vqshiftns:
17645 VShiftOpc = ARMISD::VQSHRNsIMM;
17646 break;
17647 case Intrinsic::arm_neon_vqshiftnu:
17648 VShiftOpc = ARMISD::VQSHRNuIMM;
17649 break;
17650 case Intrinsic::arm_neon_vqshiftnsu:
17651 VShiftOpc = ARMISD::VQSHRNsuIMM;
17652 break;
17653 case Intrinsic::arm_neon_vqrshiftns:
17654 VShiftOpc = ARMISD::VQRSHRNsIMM;
17655 break;
17656 case Intrinsic::arm_neon_vqrshiftnu:
17657 VShiftOpc = ARMISD::VQRSHRNuIMM;
17658 break;
17659 case Intrinsic::arm_neon_vqrshiftnsu:
17660 VShiftOpc = ARMISD::VQRSHRNsuIMM;
17661 break;
17662 }
17663
17664 SDLoc dl(N);
17665 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17666 N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
17667 }
17668
17669 case Intrinsic::arm_neon_vshiftins: {
17670 EVT VT = N->getOperand(1).getValueType();
17671 int64_t Cnt;
17672 unsigned VShiftOpc = 0;
17673
17674 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
17675 VShiftOpc = ARMISD::VSLIIMM;
17676 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
17677 VShiftOpc = ARMISD::VSRIIMM;
17678 else {
17679 llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
17680 }
17681
17682 SDLoc dl(N);
17683 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17684 N->getOperand(1), N->getOperand(2),
17685 DAG.getConstant(Cnt, dl, MVT::i32));
17686 }
17687
17688 case Intrinsic::arm_neon_vqrshifts:
17689 case Intrinsic::arm_neon_vqrshiftu:
17690 // No immediate versions of these to check for.
17691 break;
17692
17693 case Intrinsic::arm_neon_vbsl: {
17694 SDLoc dl(N);
17695 return DAG.getNode(ARMISD::VBSP, dl, N->getValueType(0), N->getOperand(1),
17696 N->getOperand(2), N->getOperand(3));
17697 }
17698 case Intrinsic::arm_mve_vqdmlah:
17699 case Intrinsic::arm_mve_vqdmlash:
17700 case Intrinsic::arm_mve_vqrdmlah:
17701 case Intrinsic::arm_mve_vqrdmlash:
17702 case Intrinsic::arm_mve_vmla_n_predicated:
17703 case Intrinsic::arm_mve_vmlas_n_predicated:
17704 case Intrinsic::arm_mve_vqdmlah_predicated:
17705 case Intrinsic::arm_mve_vqdmlash_predicated:
17706 case Intrinsic::arm_mve_vqrdmlah_predicated:
17707 case Intrinsic::arm_mve_vqrdmlash_predicated: {
17708 // These intrinsics all take an i32 scalar operand which is narrowed to the
17709 // size of a single lane of the vector type they return. So we don't need
17710 // any bits of that operand above that point, which allows us to eliminate
17711 // uxth/sxth.
17712 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
17713 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17714 if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))
17715 return SDValue();
17716 break;
17717 }
17718
17719 case Intrinsic::arm_mve_minv:
17720 case Intrinsic::arm_mve_maxv:
17721 case Intrinsic::arm_mve_minav:
17722 case Intrinsic::arm_mve_maxav:
17723 case Intrinsic::arm_mve_minv_predicated:
17724 case Intrinsic::arm_mve_maxv_predicated:
17725 case Intrinsic::arm_mve_minav_predicated:
17726 case Intrinsic::arm_mve_maxav_predicated: {
17727 // These intrinsics all take an i32 scalar operand which is narrowed to the
17728 // size of a single lane of the vector type they take as the other input.
17729 unsigned BitWidth = N->getOperand(2)->getValueType(0).getScalarSizeInBits();
17730 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17731 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
17732 return SDValue();
17733 break;
17734 }
17735
17736 case Intrinsic::arm_mve_addv: {
17737 // Turn this intrinsic straight into the appropriate ARMISD::VADDV node,
17738 // which allow PerformADDVecReduce to turn it into VADDLV when possible.
17739 bool Unsigned = N->getConstantOperandVal(2);
17740 unsigned Opc = Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs;
17741 return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1));
17742 }
17743
17744 case Intrinsic::arm_mve_addlv:
17745 case Intrinsic::arm_mve_addlv_predicated: {
17746 // Same for these, but ARMISD::VADDLV has to be followed by a BUILD_PAIR
17747 // which recombines the two outputs into an i64
17748 bool Unsigned = N->getConstantOperandVal(2);
17749 unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?
17752
17754 for (unsigned i = 1, e = N->getNumOperands(); i < e; i++)
17755 if (i != 2) // skip the unsigned flag
17756 Ops.push_back(N->getOperand(i));
17757
17758 SDLoc dl(N);
17759 SDValue val = DAG.getNode(Opc, dl, {MVT::i32, MVT::i32}, Ops);
17760 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, val.getValue(0),
17761 val.getValue(1));
17762 }
17763 }
17764
17765 return SDValue();
17766}
17767
17768/// PerformShiftCombine - Checks for immediate versions of vector shifts and
17769/// lowers them. As with the vector shift intrinsics, this is done during DAG
17770/// combining instead of DAG legalizing because the build_vectors for 64-bit
17771/// vector element shift counts are generally not legal, and it is hard to see
17772/// their values after they get legalized to loads from a constant pool.
17775 const ARMSubtarget *ST) {
17776 SelectionDAG &DAG = DCI.DAG;
17777 EVT VT = N->getValueType(0);
17778
17779 if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
17780 N->getOperand(0)->getOpcode() == ISD::AND &&
17781 N->getOperand(0)->hasOneUse()) {
17782 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
17783 return SDValue();
17784 // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't
17785 // usually show up because instcombine prefers to canonicalize it to
17786 // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come
17787 // out of GEP lowering in some cases.
17788 SDValue N0 = N->getOperand(0);
17789 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
17790 if (!ShiftAmtNode)
17791 return SDValue();
17792 uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
17793 ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17794 if (!AndMaskNode)
17795 return SDValue();
17796 uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
17797 // Don't transform uxtb/uxth.
17798 if (AndMask == 255 || AndMask == 65535)
17799 return SDValue();
17800 if (isMask_32(AndMask)) {
17801 uint32_t MaskedBits = llvm::countl_zero(AndMask);
17802 if (MaskedBits > ShiftAmt) {
17803 SDLoc DL(N);
17804 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
17805 DAG.getConstant(MaskedBits, DL, MVT::i32));
17806 return DAG.getNode(
17807 ISD::SRL, DL, MVT::i32, SHL,
17808 DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
17809 }
17810 }
17811 }
17812
17813 // Nothing to be done for scalar shifts.
17814 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17815 if (!VT.isVector() || !TLI.isTypeLegal(VT))
17816 return SDValue();
17817 if (ST->hasMVEIntegerOps())
17818 return SDValue();
17819
17820 int64_t Cnt;
17821
17822 switch (N->getOpcode()) {
17823 default: llvm_unreachable("unexpected shift opcode");
17824
17825 case ISD::SHL:
17826 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
17827 SDLoc dl(N);
17828 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
17829 DAG.getConstant(Cnt, dl, MVT::i32));
17830 }
17831 break;
17832
17833 case ISD::SRA:
17834 case ISD::SRL:
17835 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
17836 unsigned VShiftOpc =
17837 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
17838 SDLoc dl(N);
17839 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
17840 DAG.getConstant(Cnt, dl, MVT::i32));
17841 }
17842 }
17843 return SDValue();
17844}
17845
17846// Look for a sign/zero/fpextend extend of a larger than legal load. This can be
17847// split into multiple extending loads, which are simpler to deal with than an
17848// arbitrary extend. For fp extends we use an integer extending load and a VCVTL
17849// to convert the type to an f32.
17851 SDValue N0 = N->getOperand(0);
17852 if (N0.getOpcode() != ISD::LOAD)
17853 return SDValue();
17854 LoadSDNode *LD = cast<LoadSDNode>(N0.getNode());
17855 if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() ||
17856 LD->getExtensionType() != ISD::NON_EXTLOAD)
17857 return SDValue();
17858 EVT FromVT = LD->getValueType(0);
17859 EVT ToVT = N->getValueType(0);
17860 if (!ToVT.isVector())
17861 return SDValue();
17863 EVT ToEltVT = ToVT.getVectorElementType();
17864 EVT FromEltVT = FromVT.getVectorElementType();
17865
17866 unsigned NumElements = 0;
17867 if (ToEltVT == MVT::i32 && FromEltVT == MVT::i8)
17868 NumElements = 4;
17869 if (ToEltVT == MVT::f32 && FromEltVT == MVT::f16)
17870 NumElements = 4;
17871 if (NumElements == 0 ||
17872 (FromEltVT != MVT::f16 && FromVT.getVectorNumElements() == NumElements) ||
17873 FromVT.getVectorNumElements() % NumElements != 0 ||
17874 !isPowerOf2_32(NumElements))
17875 return SDValue();
17876
17877 LLVMContext &C = *DAG.getContext();
17878 SDLoc DL(LD);
17879 // Details about the old load
17880 SDValue Ch = LD->getChain();
17881 SDValue BasePtr = LD->getBasePtr();
17882 Align Alignment = LD->getOriginalAlign();
17883 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
17884 AAMDNodes AAInfo = LD->getAAInfo();
17885
17886 ISD::LoadExtType NewExtType =
17887 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
17888 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
17889 EVT NewFromVT = EVT::getVectorVT(
17890 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
17891 EVT NewToVT = EVT::getVectorVT(
17892 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
17893
17896 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
17897 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
17898 SDValue NewPtr =
17899 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
17900
17901 SDValue NewLoad =
17902 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
17903 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
17904 Alignment, MMOFlags, AAInfo);
17905 Loads.push_back(NewLoad);
17906 Chains.push_back(SDValue(NewLoad.getNode(), 1));
17907 }
17908
17909 // Float truncs need to extended with VCVTB's into their floating point types.
17910 if (FromEltVT == MVT::f16) {
17912
17913 for (unsigned i = 0; i < Loads.size(); i++) {
17914 SDValue LoadBC =
17915 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v8f16, Loads[i]);
17916 SDValue FPExt = DAG.getNode(ARMISD::VCVTL, DL, MVT::v4f32, LoadBC,
17917 DAG.getConstant(0, DL, MVT::i32));
17918 Extends.push_back(FPExt);
17919 }
17920
17921 Loads = Extends;
17922 }
17923
17924 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
17925 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
17926 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Loads);
17927}
17928
17929/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
17930/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
17932 const ARMSubtarget *ST) {
17933 SDValue N0 = N->getOperand(0);
17934
17935 // Check for sign- and zero-extensions of vector extract operations of 8- and
17936 // 16-bit vector elements. NEON and MVE support these directly. They are
17937 // handled during DAG combining because type legalization will promote them
17938 // to 32-bit types and it is messy to recognize the operations after that.
17939 if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
17941 SDValue Vec = N0.getOperand(0);
17942 SDValue Lane = N0.getOperand(1);
17943 EVT VT = N->getValueType(0);
17944 EVT EltVT = N0.getValueType();
17945 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17946
17947 if (VT == MVT::i32 &&
17948 (EltVT == MVT::i8 || EltVT == MVT::i16) &&
17949 TLI.isTypeLegal(Vec.getValueType()) &&
17950 isa<ConstantSDNode>(Lane)) {
17951
17952 unsigned Opc = 0;
17953 switch (N->getOpcode()) {
17954 default: llvm_unreachable("unexpected opcode");
17955 case ISD::SIGN_EXTEND:
17956 Opc = ARMISD::VGETLANEs;
17957 break;
17958 case ISD::ZERO_EXTEND:
17959 case ISD::ANY_EXTEND:
17960 Opc = ARMISD::VGETLANEu;
17961 break;
17962 }
17963 return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
17964 }
17965 }
17966
17967 if (ST->hasMVEIntegerOps())
17968 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17969 return NewLoad;
17970
17971 return SDValue();
17972}
17973
17975 const ARMSubtarget *ST) {
17976 if (ST->hasMVEFloatOps())
17977 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17978 return NewLoad;
17979
17980 return SDValue();
17981}
17982
17983// Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating
17984// constant bounds.
17986 const ARMSubtarget *Subtarget) {
17987 if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
17988 !Subtarget->isThumb2())
17989 return SDValue();
17990
17991 EVT VT = Op.getValueType();
17992 SDValue Op0 = Op.getOperand(0);
17993
17994 if (VT != MVT::i32 ||
17995 (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) ||
17996 !isa<ConstantSDNode>(Op.getOperand(1)) ||
17997 !isa<ConstantSDNode>(Op0.getOperand(1)))
17998 return SDValue();
17999
18000 SDValue Min = Op;
18001 SDValue Max = Op0;
18002 SDValue Input = Op0.getOperand(0);
18003 if (Min.getOpcode() == ISD::SMAX)
18004 std::swap(Min, Max);
18005
18006 APInt MinC = Min.getConstantOperandAPInt(1);
18007 APInt MaxC = Max.getConstantOperandAPInt(1);
18008
18009 if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX ||
18010 !(MinC + 1).isPowerOf2())
18011 return SDValue();
18012
18013 SDLoc DL(Op);
18014 if (MinC == ~MaxC)
18015 return DAG.getNode(ARMISD::SSAT, DL, VT, Input,
18016 DAG.getConstant(MinC.countr_one(), DL, VT));
18017 if (MaxC == 0)
18018 return DAG.getNode(ARMISD::USAT, DL, VT, Input,
18019 DAG.getConstant(MinC.countr_one(), DL, VT));
18020
18021 return SDValue();
18022}
18023
18024/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
18025/// saturates.
18027 const ARMSubtarget *ST) {
18028 EVT VT = N->getValueType(0);
18029 SDValue N0 = N->getOperand(0);
18030
18031 if (VT == MVT::i32)
18032 return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST);
18033
18034 if (!ST->hasMVEIntegerOps())
18035 return SDValue();
18036
18037 if (SDValue V = PerformVQDMULHCombine(N, DAG))
18038 return V;
18039
18040 if (VT != MVT::v4i32 && VT != MVT::v8i16)
18041 return SDValue();
18042
18043 auto IsSignedSaturate = [&](SDNode *Min, SDNode *Max) {
18044 // Check one is a smin and the other is a smax
18045 if (Min->getOpcode() != ISD::SMIN)
18046 std::swap(Min, Max);
18047 if (Min->getOpcode() != ISD::SMIN || Max->getOpcode() != ISD::SMAX)
18048 return false;
18049
18050 APInt SaturateC;
18051 if (VT == MVT::v4i32)
18052 SaturateC = APInt(32, (1 << 15) - 1, true);
18053 else //if (VT == MVT::v8i16)
18054 SaturateC = APInt(16, (1 << 7) - 1, true);
18055
18056 APInt MinC, MaxC;
18057 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18058 MinC != SaturateC)
18059 return false;
18060 if (!ISD::isConstantSplatVector(Max->getOperand(1).getNode(), MaxC) ||
18061 MaxC != ~SaturateC)
18062 return false;
18063 return true;
18064 };
18065
18066 if (IsSignedSaturate(N, N0.getNode())) {
18067 SDLoc DL(N);
18068 MVT ExtVT, HalfVT;
18069 if (VT == MVT::v4i32) {
18070 HalfVT = MVT::v8i16;
18071 ExtVT = MVT::v4i16;
18072 } else { // if (VT == MVT::v8i16)
18073 HalfVT = MVT::v16i8;
18074 ExtVT = MVT::v8i8;
18075 }
18076
18077 // Create a VQMOVNB with undef top lanes, then signed extended into the top
18078 // half. That extend will hopefully be removed if only the bottom bits are
18079 // demanded (though a truncating store, for example).
18080 SDValue VQMOVN =
18081 DAG.getNode(ARMISD::VQMOVNs, DL, HalfVT, DAG.getUNDEF(HalfVT),
18082 N0->getOperand(0), DAG.getConstant(0, DL, MVT::i32));
18083 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
18084 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Bitcast,
18085 DAG.getValueType(ExtVT));
18086 }
18087
18088 auto IsUnsignedSaturate = [&](SDNode *Min) {
18089 // For unsigned, we just need to check for <= 0xffff
18090 if (Min->getOpcode() != ISD::UMIN)
18091 return false;
18092
18093 APInt SaturateC;
18094 if (VT == MVT::v4i32)
18095 SaturateC = APInt(32, (1 << 16) - 1, true);
18096 else //if (VT == MVT::v8i16)
18097 SaturateC = APInt(16, (1 << 8) - 1, true);
18098
18099 APInt MinC;
18100 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18101 MinC != SaturateC)
18102 return false;
18103 return true;
18104 };
18105
18106 if (IsUnsignedSaturate(N)) {
18107 SDLoc DL(N);
18108 MVT HalfVT;
18109 unsigned ExtConst;
18110 if (VT == MVT::v4i32) {
18111 HalfVT = MVT::v8i16;
18112 ExtConst = 0x0000FFFF;
18113 } else { //if (VT == MVT::v8i16)
18114 HalfVT = MVT::v16i8;
18115 ExtConst = 0x00FF;
18116 }
18117
18118 // Create a VQMOVNB with undef top lanes, then ZExt into the top half with
18119 // an AND. That extend will hopefully be removed if only the bottom bits are
18120 // demanded (though a truncating store, for example).
18121 SDValue VQMOVN =
18122 DAG.getNode(ARMISD::VQMOVNu, DL, HalfVT, DAG.getUNDEF(HalfVT), N0,
18123 DAG.getConstant(0, DL, MVT::i32));
18124 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
18125 return DAG.getNode(ISD::AND, DL, VT, Bitcast,
18126 DAG.getConstant(ExtConst, DL, VT));
18127 }
18128
18129 return SDValue();
18130}
18131
18133 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
18134 if (!C)
18135 return nullptr;
18136 const APInt *CV = &C->getAPIntValue();
18137 return CV->isPowerOf2() ? CV : nullptr;
18138}
18139
18141 // If we have a CMOV, OR and AND combination such as:
18142 // if (x & CN)
18143 // y |= CM;
18144 //
18145 // And:
18146 // * CN is a single bit;
18147 // * All bits covered by CM are known zero in y
18148 //
18149 // Then we can convert this into a sequence of BFI instructions. This will
18150 // always be a win if CM is a single bit, will always be no worse than the
18151 // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
18152 // three bits (due to the extra IT instruction).
18153
18154 SDValue Op0 = CMOV->getOperand(0);
18155 SDValue Op1 = CMOV->getOperand(1);
18156 auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue();
18157 SDValue CmpZ = CMOV->getOperand(3);
18158
18159 // The compare must be against zero.
18160 if (!isNullConstant(CmpZ->getOperand(1)))
18161 return SDValue();
18162
18163 assert(CmpZ->getOpcode() == ARMISD::CMPZ);
18164 SDValue And = CmpZ->getOperand(0);
18165 if (And->getOpcode() != ISD::AND)
18166 return SDValue();
18167 const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
18168 if (!AndC)
18169 return SDValue();
18170 SDValue X = And->getOperand(0);
18171
18172 if (CC == ARMCC::EQ) {
18173 // We're performing an "equal to zero" compare. Swap the operands so we
18174 // canonicalize on a "not equal to zero" compare.
18175 std::swap(Op0, Op1);
18176 } else {
18177 assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
18178 }
18179
18180 if (Op1->getOpcode() != ISD::OR)
18181 return SDValue();
18182
18183 ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
18184 if (!OrC)
18185 return SDValue();
18186 SDValue Y = Op1->getOperand(0);
18187
18188 if (Op0 != Y)
18189 return SDValue();
18190
18191 // Now, is it profitable to continue?
18192 APInt OrCI = OrC->getAPIntValue();
18193 unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
18194 if (OrCI.popcount() > Heuristic)
18195 return SDValue();
18196
18197 // Lastly, can we determine that the bits defined by OrCI
18198 // are zero in Y?
18199 KnownBits Known = DAG.computeKnownBits(Y);
18200 if ((OrCI & Known.Zero) != OrCI)
18201 return SDValue();
18202
18203 // OK, we can do the combine.
18204 SDValue V = Y;
18205 SDLoc dl(X);
18206 EVT VT = X.getValueType();
18207 unsigned BitInX = AndC->logBase2();
18208
18209 if (BitInX != 0) {
18210 // We must shift X first.
18211 X = DAG.getNode(ISD::SRL, dl, VT, X,
18212 DAG.getConstant(BitInX, dl, VT));
18213 }
18214
18215 for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
18216 BitInY < NumActiveBits; ++BitInY) {
18217 if (OrCI[BitInY] == 0)
18218 continue;
18219 APInt Mask(VT.getSizeInBits(), 0);
18220 Mask.setBit(BitInY);
18221 V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
18222 // Confusingly, the operand is an *inverted* mask.
18223 DAG.getConstant(~Mask, dl, VT));
18224 }
18225
18226 return V;
18227}
18228
18229// Given N, the value controlling the conditional branch, search for the loop
18230// intrinsic, returning it, along with how the value is used. We need to handle
18231// patterns such as the following:
18232// (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit)
18233// (brcond (setcc (loop.decrement), 0, eq), exit)
18234// (brcond (setcc (loop.decrement), 0, ne), header)
18236 bool &Negate) {
18237 switch (N->getOpcode()) {
18238 default:
18239 break;
18240 case ISD::XOR: {
18241 if (!isa<ConstantSDNode>(N.getOperand(1)))
18242 return SDValue();
18243 if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())
18244 return SDValue();
18245 Negate = !Negate;
18246 return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate);
18247 }
18248 case ISD::SETCC: {
18249 auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));
18250 if (!Const)
18251 return SDValue();
18252 if (Const->isZero())
18253 Imm = 0;
18254 else if (Const->isOne())
18255 Imm = 1;
18256 else
18257 return SDValue();
18258 CC = cast<CondCodeSDNode>(N.getOperand(2))->get();
18259 return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
18260 }
18262 unsigned IntOp = N.getConstantOperandVal(1);
18263 if (IntOp != Intrinsic::test_start_loop_iterations &&
18264 IntOp != Intrinsic::loop_decrement_reg)
18265 return SDValue();
18266 return N;
18267 }
18268 }
18269 return SDValue();
18270}
18271
18274 const ARMSubtarget *ST) {
18275
18276 // The hwloop intrinsics that we're interested are used for control-flow,
18277 // either for entering or exiting the loop:
18278 // - test.start.loop.iterations will test whether its operand is zero. If it
18279 // is zero, the proceeding branch should not enter the loop.
18280 // - loop.decrement.reg also tests whether its operand is zero. If it is
18281 // zero, the proceeding branch should not branch back to the beginning of
18282 // the loop.
18283 // So here, we need to check that how the brcond is using the result of each
18284 // of the intrinsics to ensure that we're branching to the right place at the
18285 // right time.
18286
18288 SDValue Cond;
18289 int Imm = 1;
18290 bool Negate = false;
18291 SDValue Chain = N->getOperand(0);
18292 SDValue Dest;
18293
18294 if (N->getOpcode() == ISD::BRCOND) {
18295 CC = ISD::SETEQ;
18296 Cond = N->getOperand(1);
18297 Dest = N->getOperand(2);
18298 } else {
18299 assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!");
18300 CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18301 Cond = N->getOperand(2);
18302 Dest = N->getOperand(4);
18303 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
18304 if (!Const->isOne() && !Const->isZero())
18305 return SDValue();
18306 Imm = Const->getZExtValue();
18307 } else
18308 return SDValue();
18309 }
18310
18311 SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate);
18312 if (!Int)
18313 return SDValue();
18314
18315 if (Negate)
18316 CC = ISD::getSetCCInverse(CC, /* Integer inverse */ MVT::i32);
18317
18318 auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) {
18319 return (CC == ISD::SETEQ && Imm == 0) ||
18320 (CC == ISD::SETNE && Imm == 1) ||
18321 (CC == ISD::SETLT && Imm == 1) ||
18322 (CC == ISD::SETULT && Imm == 1);
18323 };
18324
18325 auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) {
18326 return (CC == ISD::SETEQ && Imm == 1) ||
18327 (CC == ISD::SETNE && Imm == 0) ||
18328 (CC == ISD::SETGT && Imm == 0) ||
18329 (CC == ISD::SETUGT && Imm == 0) ||
18330 (CC == ISD::SETGE && Imm == 1) ||
18331 (CC == ISD::SETUGE && Imm == 1);
18332 };
18333
18334 assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&
18335 "unsupported condition");
18336
18337 SDLoc dl(Int);
18338 SelectionDAG &DAG = DCI.DAG;
18339 SDValue Elements = Int.getOperand(2);
18340 unsigned IntOp = Int->getConstantOperandVal(1);
18341 assert((N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BR) &&
18342 "expected single br user");
18343 SDNode *Br = *N->user_begin();
18344 SDValue OtherTarget = Br->getOperand(1);
18345
18346 // Update the unconditional branch to branch to the given Dest.
18347 auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) {
18348 SDValue NewBrOps[] = { Br->getOperand(0), Dest };
18349 SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps);
18350 DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr);
18351 };
18352
18353 if (IntOp == Intrinsic::test_start_loop_iterations) {
18354 SDValue Res;
18355 SDValue Setup = DAG.getNode(ARMISD::WLSSETUP, dl, MVT::i32, Elements);
18356 // We expect this 'instruction' to branch when the counter is zero.
18357 if (IsTrueIfZero(CC, Imm)) {
18358 SDValue Ops[] = {Chain, Setup, Dest};
18359 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18360 } else {
18361 // The logic is the reverse of what we need for WLS, so find the other
18362 // basic block target: the target of the proceeding br.
18363 UpdateUncondBr(Br, Dest, DAG);
18364
18365 SDValue Ops[] = {Chain, Setup, OtherTarget};
18366 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18367 }
18368 // Update LR count to the new value
18369 DAG.ReplaceAllUsesOfValueWith(Int.getValue(0), Setup);
18370 // Update chain
18371 DAG.ReplaceAllUsesOfValueWith(Int.getValue(2), Int.getOperand(0));
18372 return Res;
18373 } else {
18374 SDValue Size =
18375 DAG.getTargetConstant(Int.getConstantOperandVal(3), dl, MVT::i32);
18376 SDValue Args[] = { Int.getOperand(0), Elements, Size, };
18377 SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl,
18378 DAG.getVTList(MVT::i32, MVT::Other), Args);
18379 DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode());
18380
18381 // We expect this instruction to branch when the count is not zero.
18382 SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;
18383
18384 // Update the unconditional branch to target the loop preheader if we've
18385 // found the condition has been reversed.
18386 if (Target == OtherTarget)
18387 UpdateUncondBr(Br, Dest, DAG);
18388
18389 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18390 SDValue(LoopDec.getNode(), 1), Chain);
18391
18392 SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target };
18393 return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs);
18394 }
18395 return SDValue();
18396}
18397
18398/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
18399SDValue
18401 SDValue Cmp = N->getOperand(3);
18402 if (Cmp.getOpcode() != ARMISD::CMPZ)
18403 // Only looking at NE cases.
18404 return SDValue();
18405
18406 SDLoc dl(N);
18407 SDValue LHS = Cmp.getOperand(0);
18408 SDValue RHS = Cmp.getOperand(1);
18409 SDValue Chain = N->getOperand(0);
18410 SDValue BB = N->getOperand(1);
18411 SDValue ARMcc = N->getOperand(2);
18413
18414 // (brcond Chain BB ne (cmpz (and (cmov 0 1 CC Flags) 1) 0))
18415 // -> (brcond Chain BB CC Flags)
18416 if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
18417 LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
18418 LHS->getOperand(0)->hasOneUse() &&
18419 isNullConstant(LHS->getOperand(0)->getOperand(0)) &&
18420 isOneConstant(LHS->getOperand(0)->getOperand(1)) &&
18421 isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) {
18422 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, BB,
18423 LHS->getOperand(0)->getOperand(2),
18424 LHS->getOperand(0)->getOperand(3));
18425 }
18426
18427 return SDValue();
18428}
18429
18430/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
18431SDValue
18433 SDValue Cmp = N->getOperand(3);
18434 if (Cmp.getOpcode() != ARMISD::CMPZ)
18435 // Only looking at EQ and NE cases.
18436 return SDValue();
18437
18438 EVT VT = N->getValueType(0);
18439 SDLoc dl(N);
18440 SDValue LHS = Cmp.getOperand(0);
18441 SDValue RHS = Cmp.getOperand(1);
18442 SDValue FalseVal = N->getOperand(0);
18443 SDValue TrueVal = N->getOperand(1);
18444 SDValue ARMcc = N->getOperand(2);
18446
18447 // BFI is only available on V6T2+.
18448 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
18450 if (R)
18451 return R;
18452 }
18453
18454 // Simplify
18455 // mov r1, r0
18456 // cmp r1, x
18457 // mov r0, y
18458 // moveq r0, x
18459 // to
18460 // cmp r0, x
18461 // movne r0, y
18462 //
18463 // mov r1, r0
18464 // cmp r1, x
18465 // mov r0, x
18466 // movne r0, y
18467 // to
18468 // cmp r0, x
18469 // movne r0, y
18470 /// FIXME: Turn this into a target neutral optimization?
18471 SDValue Res;
18472 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
18473 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, Cmp);
18474 } else if (CC == ARMCC::EQ && TrueVal == RHS) {
18475 SDValue ARMcc;
18476 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
18477 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, NewCmp);
18478 }
18479
18480 // (cmov F T ne (cmpz (cmov 0 1 CC Flags) 0))
18481 // -> (cmov F T CC Flags)
18482 if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&
18483 isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
18485 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
18486 LHS->getOperand(2), LHS->getOperand(3));
18487 }
18488
18489 if (!VT.isInteger())
18490 return SDValue();
18491
18492 // Fold away an unneccessary CMPZ/CMOV
18493 // CMOV A, B, C1, (CMPZ (CMOV 1, 0, C2, D), 0) ->
18494 // if C1==EQ -> CMOV A, B, C2, D
18495 // if C1==NE -> CMOV A, B, NOT(C2), D
18496 if (N->getConstantOperandVal(2) == ARMCC::EQ ||
18497 N->getConstantOperandVal(2) == ARMCC::NE) {
18499 if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
18500 if (N->getConstantOperandVal(2) == ARMCC::NE)
18502 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
18503 N->getOperand(1),
18504 DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
18505 }
18506 }
18507
18508 // Materialize a boolean comparison for integers so we can avoid branching.
18509 if (isNullConstant(FalseVal)) {
18510 if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
18511 if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
18512 // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
18513 // right 5 bits will make that 32 be 1, otherwise it will be 0.
18514 // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
18515 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18516 Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub),
18517 DAG.getConstant(5, dl, MVT::i32));
18518 } else {
18519 // CMOV 0, 1, ==, (CMPZ x, y) ->
18520 // (UADDO_CARRY (SUB x, y), t:0, t:1)
18521 // where t = (USUBO_CARRY 0, (SUB x, y), 0)
18522 //
18523 // The USUBO_CARRY computes 0 - (x - y) and this will give a borrow when
18524 // x != y. In other words, a carry C == 1 when x == y, C == 0
18525 // otherwise.
18526 // The final UADDO_CARRY computes
18527 // x - y + (0 - (x - y)) + C == C
18528 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18529 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18530 SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);
18531 // ISD::USUBO_CARRY returns a borrow but we want the carry here
18532 // actually.
18533 SDValue Carry =
18534 DAG.getNode(ISD::SUB, dl, MVT::i32,
18535 DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
18536 Res = DAG.getNode(ISD::UADDO_CARRY, dl, VTs, Sub, Neg, Carry);
18537 }
18538 } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
18539 (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
18540 // This seems pointless but will allow us to combine it further below.
18541 // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
18542 SDValue Sub =
18543 DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18544 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
18545 Sub.getValue(1));
18546 FalseVal = Sub;
18547 }
18548 } else if (isNullConstant(TrueVal)) {
18549 if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
18550 (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
18551 // This seems pointless but will allow us to combine it further below
18552 // Note that we change == for != as this is the dual for the case above.
18553 // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
18554 SDValue Sub =
18555 DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18556 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
18557 DAG.getConstant(ARMCC::NE, dl, MVT::i32),
18558 Sub.getValue(1));
18559 FalseVal = Sub;
18560 }
18561 }
18562
18563 // On Thumb1, the DAG above may be further combined if z is a power of 2
18564 // (z == 2 ^ K).
18565 // CMOV (SUBC x, y), z, !=, (SUBC x, y):1 ->
18566 // t1 = (USUBO (SUB x, y), 1)
18567 // t2 = (USUBO_CARRY (SUB x, y), t1:0, t1:1)
18568 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18569 //
18570 // This also handles the special case of comparing against zero; it's
18571 // essentially, the same pattern, except there's no SUBC:
18572 // CMOV x, z, !=, (CMPZ x, 0) ->
18573 // t1 = (USUBO x, 1)
18574 // t2 = (USUBO_CARRY x, t1:0, t1:1)
18575 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18576 const APInt *TrueConst;
18577 if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
18578 ((FalseVal.getOpcode() == ARMISD::SUBC && FalseVal.getOperand(0) == LHS &&
18579 FalseVal.getOperand(1) == RHS) ||
18580 (FalseVal == LHS && isNullConstant(RHS))) &&
18581 (TrueConst = isPowerOf2Constant(TrueVal))) {
18582 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18583 unsigned ShiftAmount = TrueConst->logBase2();
18584 if (ShiftAmount)
18585 TrueVal = DAG.getConstant(1, dl, VT);
18586 SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
18587 Res = DAG.getNode(ISD::USUBO_CARRY, dl, VTs, FalseVal, Subc,
18588 Subc.getValue(1));
18589
18590 if (ShiftAmount)
18591 Res = DAG.getNode(ISD::SHL, dl, VT, Res,
18592 DAG.getConstant(ShiftAmount, dl, MVT::i32));
18593 }
18594
18595 if (Res.getNode()) {
18596 KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
18597 // Capture demanded bits information that would be otherwise lost.
18598 if (Known.Zero == 0xfffffffe)
18599 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18600 DAG.getValueType(MVT::i1));
18601 else if (Known.Zero == 0xffffff00)
18602 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18603 DAG.getValueType(MVT::i8));
18604 else if (Known.Zero == 0xffff0000)
18605 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18606 DAG.getValueType(MVT::i16));
18607 }
18608
18609 return Res;
18610}
18611
18614 const ARMSubtarget *ST) {
18615 SelectionDAG &DAG = DCI.DAG;
18616 SDValue Src = N->getOperand(0);
18617 EVT DstVT = N->getValueType(0);
18618
18619 // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.
18620 if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
18621 EVT SrcVT = Src.getValueType();
18622 if (SrcVT.getScalarSizeInBits() == DstVT.getScalarSizeInBits())
18623 return DAG.getNode(ARMISD::VDUP, SDLoc(N), DstVT, Src.getOperand(0));
18624 }
18625
18626 // We may have a bitcast of something that has already had this bitcast
18627 // combine performed on it, so skip past any VECTOR_REG_CASTs.
18628 if (Src.getOpcode() == ARMISD::VECTOR_REG_CAST &&
18629 Src.getOperand(0).getValueType().getScalarSizeInBits() <=
18630 Src.getValueType().getScalarSizeInBits())
18631 Src = Src.getOperand(0);
18632
18633 // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
18634 // would be generated is at least the width of the element type.
18635 EVT SrcVT = Src.getValueType();
18636 if ((Src.getOpcode() == ARMISD::VMOVIMM ||
18637 Src.getOpcode() == ARMISD::VMVNIMM ||
18638 Src.getOpcode() == ARMISD::VMOVFPIMM) &&
18639 SrcVT.getScalarSizeInBits() <= DstVT.getScalarSizeInBits() &&
18640 DAG.getDataLayout().isBigEndian())
18641 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(N), DstVT, Src);
18642
18643 // bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD x
18644 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
18645 return R;
18646
18647 return SDValue();
18648}
18649
18650// Some combines for the MVETrunc truncations legalizer helper. Also lowers the
18651// node into stack operations after legalizeOps.
18654 SelectionDAG &DAG = DCI.DAG;
18655 EVT VT = N->getValueType(0);
18656 SDLoc DL(N);
18657
18658 // MVETrunc(Undef, Undef) -> Undef
18659 if (all_of(N->ops(), [](SDValue Op) { return Op.isUndef(); }))
18660 return DAG.getUNDEF(VT);
18661
18662 // MVETrunc(MVETrunc a b, MVETrunc c, d) -> MVETrunc
18663 if (N->getNumOperands() == 2 &&
18664 N->getOperand(0).getOpcode() == ARMISD::MVETRUNC &&
18665 N->getOperand(1).getOpcode() == ARMISD::MVETRUNC)
18666 return DAG.getNode(ARMISD::MVETRUNC, DL, VT, N->getOperand(0).getOperand(0),
18667 N->getOperand(0).getOperand(1),
18668 N->getOperand(1).getOperand(0),
18669 N->getOperand(1).getOperand(1));
18670
18671 // MVETrunc(shuffle, shuffle) -> VMOVN
18672 if (N->getNumOperands() == 2 &&
18673 N->getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
18674 N->getOperand(1).getOpcode() == ISD::VECTOR_SHUFFLE) {
18675 auto *S0 = cast<ShuffleVectorSDNode>(N->getOperand(0).getNode());
18676 auto *S1 = cast<ShuffleVectorSDNode>(N->getOperand(1).getNode());
18677
18678 if (S0->getOperand(0) == S1->getOperand(0) &&
18679 S0->getOperand(1) == S1->getOperand(1)) {
18680 // Construct complete shuffle mask
18681 SmallVector<int, 8> Mask(S0->getMask());
18682 Mask.append(S1->getMask().begin(), S1->getMask().end());
18683
18684 if (isVMOVNTruncMask(Mask, VT, false))
18685 return DAG.getNode(
18686 ARMISD::VMOVN, DL, VT,
18687 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18688 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18689 DAG.getConstant(1, DL, MVT::i32));
18690 if (isVMOVNTruncMask(Mask, VT, true))
18691 return DAG.getNode(
18692 ARMISD::VMOVN, DL, VT,
18693 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18694 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18695 DAG.getConstant(1, DL, MVT::i32));
18696 }
18697 }
18698
18699 // For MVETrunc of a buildvector or shuffle, it can be beneficial to lower the
18700 // truncate to a buildvector to allow the generic optimisations to kick in.
18701 if (all_of(N->ops(), [](SDValue Op) {
18702 return Op.getOpcode() == ISD::BUILD_VECTOR ||
18703 Op.getOpcode() == ISD::VECTOR_SHUFFLE ||
18704 (Op.getOpcode() == ISD::BITCAST &&
18705 Op.getOperand(0).getOpcode() == ISD::BUILD_VECTOR);
18706 })) {
18707 SmallVector<SDValue, 8> Extracts;
18708 for (unsigned Op = 0; Op < N->getNumOperands(); Op++) {
18709 SDValue O = N->getOperand(Op);
18710 for (unsigned i = 0; i < O.getValueType().getVectorNumElements(); i++) {
18711 SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, O,
18712 DAG.getConstant(i, DL, MVT::i32));
18713 Extracts.push_back(Ext);
18714 }
18715 }
18716 return DAG.getBuildVector(VT, DL, Extracts);
18717 }
18718
18719 // If we are late in the legalization process and nothing has optimised
18720 // the trunc to anything better, lower it to a stack store and reload,
18721 // performing the truncation whilst keeping the lanes in the correct order:
18722 // VSTRH.32 a, stack; VSTRH.32 b, stack+8; VLDRW.32 stack;
18723 if (!DCI.isAfterLegalizeDAG())
18724 return SDValue();
18725
18726 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18727 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18728 int NumIns = N->getNumOperands();
18729 assert((NumIns == 2 || NumIns == 4) &&
18730 "Expected 2 or 4 inputs to an MVETrunc");
18731 EVT StoreVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
18732 if (N->getNumOperands() == 4)
18733 StoreVT = StoreVT.getHalfNumVectorElementsVT(*DAG.getContext());
18734
18735 SmallVector<SDValue> Chains;
18736 for (int I = 0; I < NumIns; I++) {
18737 SDValue Ptr = DAG.getNode(
18738 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18739 DAG.getConstant(I * 16 / NumIns, DL, StackPtr.getValueType()));
18741 DAG.getMachineFunction(), SPFI, I * 16 / NumIns);
18742 SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), DL, N->getOperand(I),
18743 Ptr, MPI, StoreVT, Align(4));
18744 Chains.push_back(Ch);
18745 }
18746
18747 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18748 MachinePointerInfo MPI =
18750 return DAG.getLoad(VT, DL, Chain, StackPtr, MPI, Align(4));
18751}
18752
18753// Take a MVEEXT(load x) and split that into (extload x, extload x+8)
18755 SelectionDAG &DAG) {
18756 SDValue N0 = N->getOperand(0);
18757 LoadSDNode *LD = dyn_cast<LoadSDNode>(N0.getNode());
18758 if (!LD || !LD->isSimple() || !N0.hasOneUse() || LD->isIndexed())
18759 return SDValue();
18760
18761 EVT FromVT = LD->getMemoryVT();
18762 EVT ToVT = N->getValueType(0);
18763 if (!ToVT.isVector())
18764 return SDValue();
18765 assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements() * 2);
18766 EVT ToEltVT = ToVT.getVectorElementType();
18767 EVT FromEltVT = FromVT.getVectorElementType();
18768
18769 unsigned NumElements = 0;
18770 if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8))
18771 NumElements = 4;
18772 if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)
18773 NumElements = 8;
18774 assert(NumElements != 0);
18775
18776 ISD::LoadExtType NewExtType =
18777 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
18778 if (LD->getExtensionType() != ISD::NON_EXTLOAD &&
18779 LD->getExtensionType() != ISD::EXTLOAD &&
18780 LD->getExtensionType() != NewExtType)
18781 return SDValue();
18782
18783 LLVMContext &C = *DAG.getContext();
18784 SDLoc DL(LD);
18785 // Details about the old load
18786 SDValue Ch = LD->getChain();
18787 SDValue BasePtr = LD->getBasePtr();
18788 Align Alignment = LD->getOriginalAlign();
18789 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
18790 AAMDNodes AAInfo = LD->getAAInfo();
18791
18792 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
18793 EVT NewFromVT = EVT::getVectorVT(
18794 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
18795 EVT NewToVT = EVT::getVectorVT(
18796 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
18797
18800 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
18801 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
18802 SDValue NewPtr =
18803 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
18804
18805 SDValue NewLoad =
18806 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
18807 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
18808 Alignment, MMOFlags, AAInfo);
18809 Loads.push_back(NewLoad);
18810 Chains.push_back(SDValue(NewLoad.getNode(), 1));
18811 }
18812
18813 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18814 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
18815 return DAG.getMergeValues(Loads, DL);
18816}
18817
18818// Perform combines for MVEEXT. If it has not be optimized to anything better
18819// before lowering, it gets converted to stack store and extloads performing the
18820// extend whilst still keeping the same lane ordering.
18823 SelectionDAG &DAG = DCI.DAG;
18824 EVT VT = N->getValueType(0);
18825 SDLoc DL(N);
18826 assert(N->getNumValues() == 2 && "Expected MVEEXT with 2 elements");
18827 assert((VT == MVT::v4i32 || VT == MVT::v8i16) && "Unexpected MVEEXT type");
18828
18829 EVT ExtVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18830 *DAG.getContext());
18831 auto Extend = [&](SDValue V) {
18832 SDValue VVT = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, V);
18833 return N->getOpcode() == ARMISD::MVESEXT
18834 ? DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, VVT,
18835 DAG.getValueType(ExtVT))
18836 : DAG.getZeroExtendInReg(VVT, DL, ExtVT);
18837 };
18838
18839 // MVEEXT(VDUP) -> SIGN_EXTEND_INREG(VDUP)
18840 if (N->getOperand(0).getOpcode() == ARMISD::VDUP) {
18841 SDValue Ext = Extend(N->getOperand(0));
18842 return DAG.getMergeValues({Ext, Ext}, DL);
18843 }
18844
18845 // MVEEXT(shuffle) -> SIGN_EXTEND_INREG/ZERO_EXTEND_INREG
18846 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0))) {
18847 ArrayRef<int> Mask = SVN->getMask();
18848 assert(Mask.size() == 2 * VT.getVectorNumElements());
18849 assert(Mask.size() == SVN->getValueType(0).getVectorNumElements());
18850 unsigned Rev = VT == MVT::v4i32 ? ARMISD::VREV32 : ARMISD::VREV16;
18851 SDValue Op0 = SVN->getOperand(0);
18852 SDValue Op1 = SVN->getOperand(1);
18853
18854 auto CheckInregMask = [&](int Start, int Offset) {
18855 for (int Idx = 0, E = VT.getVectorNumElements(); Idx < E; ++Idx)
18856 if (Mask[Start + Idx] >= 0 && Mask[Start + Idx] != Idx * 2 + Offset)
18857 return false;
18858 return true;
18859 };
18860 SDValue V0 = SDValue(N, 0);
18861 SDValue V1 = SDValue(N, 1);
18862 if (CheckInregMask(0, 0))
18863 V0 = Extend(Op0);
18864 else if (CheckInregMask(0, 1))
18865 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18866 else if (CheckInregMask(0, Mask.size()))
18867 V0 = Extend(Op1);
18868 else if (CheckInregMask(0, Mask.size() + 1))
18869 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18870
18871 if (CheckInregMask(VT.getVectorNumElements(), Mask.size()))
18872 V1 = Extend(Op1);
18873 else if (CheckInregMask(VT.getVectorNumElements(), Mask.size() + 1))
18874 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18875 else if (CheckInregMask(VT.getVectorNumElements(), 0))
18876 V1 = Extend(Op0);
18877 else if (CheckInregMask(VT.getVectorNumElements(), 1))
18878 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18879
18880 if (V0.getNode() != N || V1.getNode() != N)
18881 return DAG.getMergeValues({V0, V1}, DL);
18882 }
18883
18884 // MVEEXT(load) -> extload, extload
18885 if (N->getOperand(0)->getOpcode() == ISD::LOAD)
18887 return L;
18888
18889 if (!DCI.isAfterLegalizeDAG())
18890 return SDValue();
18891
18892 // Lower to a stack store and reload:
18893 // VSTRW.32 a, stack; VLDRH.32 stack; VLDRH.32 stack+8;
18894 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18895 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18896 int NumOuts = N->getNumValues();
18897 assert((NumOuts == 2 || NumOuts == 4) &&
18898 "Expected 2 or 4 outputs to an MVEEXT");
18899 EVT LoadVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18900 *DAG.getContext());
18901 if (N->getNumOperands() == 4)
18902 LoadVT = LoadVT.getHalfNumVectorElementsVT(*DAG.getContext());
18903
18904 MachinePointerInfo MPI =
18906 SDValue Chain = DAG.getStore(DAG.getEntryNode(), DL, N->getOperand(0),
18907 StackPtr, MPI, Align(4));
18908
18910 for (int I = 0; I < NumOuts; I++) {
18911 SDValue Ptr = DAG.getNode(
18912 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18913 DAG.getConstant(I * 16 / NumOuts, DL, StackPtr.getValueType()));
18915 DAG.getMachineFunction(), SPFI, I * 16 / NumOuts);
18916 SDValue Load = DAG.getExtLoad(
18917 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL,
18918 VT, Chain, Ptr, MPI, LoadVT, Align(4));
18919 Loads.push_back(Load);
18920 }
18921
18922 return DAG.getMergeValues(Loads, DL);
18923}
18924
18926 DAGCombinerInfo &DCI) const {
18927 switch (N->getOpcode()) {
18928 default: break;
18929 case ISD::SELECT_CC:
18930 case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
18931 case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
18932 case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
18933 case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
18934 case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
18935 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
18936 case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);
18937 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
18938 case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
18939 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
18940 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
18941 case ISD::BRCOND:
18942 case ISD::BR_CC: return PerformHWLoopCombine(N, DCI, Subtarget);
18943 case ARMISD::ADDC:
18944 case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
18945 case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
18946 case ARMISD::BFI: return PerformBFICombine(N, DCI.DAG);
18947 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
18948 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
18949 case ARMISD::VMOVhr: return PerformVMOVhrCombine(N, DCI);
18950 case ARMISD::VMOVrh: return PerformVMOVrhCombine(N, DCI.DAG);
18951 case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
18952 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
18955 return PerformExtractEltCombine(N, DCI, Subtarget);
18959 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
18960 case ARMISD::VDUP: return PerformVDUPCombine(N, DCI.DAG, Subtarget);
18961 case ISD::FP_TO_SINT:
18962 case ISD::FP_TO_UINT:
18963 return PerformVCVTCombine(N, DCI.DAG, Subtarget);
18964 case ISD::FADD:
18965 return PerformFADDCombine(N, DCI.DAG, Subtarget);
18966 case ISD::FMUL:
18967 return PerformVMulVCTPCombine(N, DCI.DAG, Subtarget);
18969 return PerformIntrinsicCombine(N, DCI);
18970 case ISD::SHL:
18971 case ISD::SRA:
18972 case ISD::SRL:
18973 return PerformShiftCombine(N, DCI, Subtarget);
18974 case ISD::SIGN_EXTEND:
18975 case ISD::ZERO_EXTEND:
18976 case ISD::ANY_EXTEND:
18977 return PerformExtendCombine(N, DCI.DAG, Subtarget);
18978 case ISD::FP_EXTEND:
18979 return PerformFPExtendCombine(N, DCI.DAG, Subtarget);
18980 case ISD::SMIN:
18981 case ISD::UMIN:
18982 case ISD::SMAX:
18983 case ISD::UMAX:
18984 return PerformMinMaxCombine(N, DCI.DAG, Subtarget);
18985 case ARMISD::CMOV:
18986 return PerformCMOVCombine(N, DCI.DAG);
18987 case ARMISD::BRCOND:
18988 return PerformBRCONDCombine(N, DCI.DAG);
18989 case ARMISD::CMPZ:
18990 return PerformCMPZCombine(N, DCI.DAG);
18991 case ARMISD::CSINC:
18992 case ARMISD::CSINV:
18993 case ARMISD::CSNEG:
18994 return PerformCSETCombine(N, DCI.DAG);
18995 case ISD::LOAD:
18996 return PerformLOADCombine(N, DCI, Subtarget);
18997 case ARMISD::VLD1DUP:
18998 case ARMISD::VLD2DUP:
18999 case ARMISD::VLD3DUP:
19000 case ARMISD::VLD4DUP:
19001 return PerformVLDCombine(N, DCI);
19003 return PerformARMBUILD_VECTORCombine(N, DCI);
19004 case ISD::BITCAST:
19005 return PerformBITCASTCombine(N, DCI, Subtarget);
19007 return PerformPREDICATE_CASTCombine(N, DCI);
19009 return PerformVECTOR_REG_CASTCombine(N, DCI.DAG, Subtarget);
19010 case ARMISD::MVETRUNC:
19011 return PerformMVETruncCombine(N, DCI);
19012 case ARMISD::MVESEXT:
19013 case ARMISD::MVEZEXT:
19014 return PerformMVEExtCombine(N, DCI);
19015 case ARMISD::VCMP:
19016 return PerformVCMPCombine(N, DCI.DAG, Subtarget);
19017 case ISD::VECREDUCE_ADD:
19018 return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);
19019 case ARMISD::VADDVs:
19020 case ARMISD::VADDVu:
19021 case ARMISD::VADDLVs:
19022 case ARMISD::VADDLVu:
19023 case ARMISD::VADDLVAs:
19024 case ARMISD::VADDLVAu:
19025 case ARMISD::VMLAVs:
19026 case ARMISD::VMLAVu:
19027 case ARMISD::VMLALVs:
19028 case ARMISD::VMLALVu:
19029 case ARMISD::VMLALVAs:
19030 case ARMISD::VMLALVAu:
19031 return PerformReduceShuffleCombine(N, DCI.DAG);
19032 case ARMISD::VMOVN:
19033 return PerformVMOVNCombine(N, DCI);
19034 case ARMISD::VQMOVNs:
19035 case ARMISD::VQMOVNu:
19036 return PerformVQMOVNCombine(N, DCI);
19037 case ARMISD::VQDMULH:
19038 return PerformVQDMULHCombine(N, DCI);
19039 case ARMISD::ASRL:
19040 case ARMISD::LSRL:
19041 case ARMISD::LSLL:
19042 return PerformLongShiftCombine(N, DCI.DAG);
19043 case ARMISD::SMULWB: {
19044 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19045 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
19046 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
19047 return SDValue();
19048 break;
19049 }
19050 case ARMISD::SMULWT: {
19051 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19052 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
19053 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
19054 return SDValue();
19055 break;
19056 }
19057 case ARMISD::SMLALBB:
19058 case ARMISD::QADD16b:
19059 case ARMISD::QSUB16b:
19060 case ARMISD::UQADD16b:
19061 case ARMISD::UQSUB16b: {
19062 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19063 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
19064 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19065 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19066 return SDValue();
19067 break;
19068 }
19069 case ARMISD::SMLALBT: {
19070 unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
19071 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
19072 unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
19073 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
19074 if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
19075 (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
19076 return SDValue();
19077 break;
19078 }
19079 case ARMISD::SMLALTB: {
19080 unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
19081 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
19082 unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
19083 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
19084 if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
19085 (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
19086 return SDValue();
19087 break;
19088 }
19089 case ARMISD::SMLALTT: {
19090 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19091 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
19092 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19093 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19094 return SDValue();
19095 break;
19096 }
19097 case ARMISD::QADD8b:
19098 case ARMISD::QSUB8b:
19099 case ARMISD::UQADD8b:
19100 case ARMISD::UQSUB8b: {
19101 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19102 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
19103 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19104 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19105 return SDValue();
19106 break;
19107 }
19108 case ARMISD::VBSP:
19109 if (N->getOperand(1) == N->getOperand(2))
19110 return N->getOperand(1);
19111 return SDValue();
19114 switch (N->getConstantOperandVal(1)) {
19115 case Intrinsic::arm_neon_vld1:
19116 case Intrinsic::arm_neon_vld1x2:
19117 case Intrinsic::arm_neon_vld1x3:
19118 case Intrinsic::arm_neon_vld1x4:
19119 case Intrinsic::arm_neon_vld2:
19120 case Intrinsic::arm_neon_vld3:
19121 case Intrinsic::arm_neon_vld4:
19122 case Intrinsic::arm_neon_vld2lane:
19123 case Intrinsic::arm_neon_vld3lane:
19124 case Intrinsic::arm_neon_vld4lane:
19125 case Intrinsic::arm_neon_vld2dup:
19126 case Intrinsic::arm_neon_vld3dup:
19127 case Intrinsic::arm_neon_vld4dup:
19128 case Intrinsic::arm_neon_vst1:
19129 case Intrinsic::arm_neon_vst1x2:
19130 case Intrinsic::arm_neon_vst1x3:
19131 case Intrinsic::arm_neon_vst1x4:
19132 case Intrinsic::arm_neon_vst2:
19133 case Intrinsic::arm_neon_vst3:
19134 case Intrinsic::arm_neon_vst4:
19135 case Intrinsic::arm_neon_vst2lane:
19136 case Intrinsic::arm_neon_vst3lane:
19137 case Intrinsic::arm_neon_vst4lane:
19138 return PerformVLDCombine(N, DCI);
19139 case Intrinsic::arm_mve_vld2q:
19140 case Intrinsic::arm_mve_vld4q:
19141 case Intrinsic::arm_mve_vst2q:
19142 case Intrinsic::arm_mve_vst4q:
19143 return PerformMVEVLDCombine(N, DCI);
19144 default: break;
19145 }
19146 break;
19147 }
19148 return SDValue();
19149}
19150
19152 EVT VT) const {
19153 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
19154}
19155
19157 Align Alignment,
19159 unsigned *Fast) const {
19160 // Depends what it gets converted into if the type is weird.
19161 if (!VT.isSimple())
19162 return false;
19163
19164 // The AllowsUnaligned flag models the SCTLR.A setting in ARM cpus
19165 bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
19166 auto Ty = VT.getSimpleVT().SimpleTy;
19167
19168 if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
19169 // Unaligned access can use (for example) LRDB, LRDH, LDR
19170 if (AllowsUnaligned) {
19171 if (Fast)
19172 *Fast = Subtarget->hasV7Ops();
19173 return true;
19174 }
19175 }
19176
19177 if (Ty == MVT::f64 || Ty == MVT::v2f64) {
19178 // For any little-endian targets with neon, we can support unaligned ld/st
19179 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
19180 // A big-endian target may also explicitly support unaligned accesses
19181 if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
19182 if (Fast)
19183 *Fast = 1;
19184 return true;
19185 }
19186 }
19187
19188 if (!Subtarget->hasMVEIntegerOps())
19189 return false;
19190
19191 // These are for predicates
19192 if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||
19193 Ty == MVT::v2i1)) {
19194 if (Fast)
19195 *Fast = 1;
19196 return true;
19197 }
19198
19199 // These are for truncated stores/narrowing loads. They are fine so long as
19200 // the alignment is at least the size of the item being loaded
19201 if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
19202 Alignment >= VT.getScalarSizeInBits() / 8) {
19203 if (Fast)
19204 *Fast = true;
19205 return true;
19206 }
19207
19208 // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
19209 // VSTRW.U32 all store the vector register in exactly the same format, and
19210 // differ only in the range of their immediate offset field and the required
19211 // alignment. So there is always a store that can be used, regardless of
19212 // actual type.
19213 //
19214 // For big endian, that is not the case. But can still emit a (VSTRB.U8;
19215 // VREV64.8) pair and get the same effect. This will likely be better than
19216 // aligning the vector through the stack.
19217 if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
19218 Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
19219 Ty == MVT::v2f64) {
19220 if (Fast)
19221 *Fast = 1;
19222 return true;
19223 }
19224
19225 return false;
19226}
19227
19228
19230 const MemOp &Op, const AttributeList &FuncAttributes) const {
19231 // See if we can use NEON instructions for this...
19232 if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
19233 !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
19234 unsigned Fast;
19235 if (Op.size() >= 16 &&
19236 (Op.isAligned(Align(16)) ||
19237 (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, Align(1),
19239 Fast))) {
19240 return MVT::v2f64;
19241 } else if (Op.size() >= 8 &&
19242 (Op.isAligned(Align(8)) ||
19244 MVT::f64, 0, Align(1), MachineMemOperand::MONone, &Fast) &&
19245 Fast))) {
19246 return MVT::f64;
19247 }
19248 }
19249
19250 // Let the target-independent logic figure it out.
19251 return MVT::Other;
19252}
19253
19254// 64-bit integers are split into their high and low parts and held in two
19255// different registers, so the trunc is free since the low register can just
19256// be used.
19257bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
19258 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
19259 return false;
19260 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
19261 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
19262 return (SrcBits == 64 && DestBits == 32);
19263}
19264
19266 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
19267 !DstVT.isInteger())
19268 return false;
19269 unsigned SrcBits = SrcVT.getSizeInBits();
19270 unsigned DestBits = DstVT.getSizeInBits();
19271 return (SrcBits == 64 && DestBits == 32);
19272}
19273
19275 if (Val.getOpcode() != ISD::LOAD)
19276 return false;
19277
19278 EVT VT1 = Val.getValueType();
19279 if (!VT1.isSimple() || !VT1.isInteger() ||
19280 !VT2.isSimple() || !VT2.isInteger())
19281 return false;
19282
19283 switch (VT1.getSimpleVT().SimpleTy) {
19284 default: break;
19285 case MVT::i1:
19286 case MVT::i8:
19287 case MVT::i16:
19288 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
19289 return true;
19290 }
19291
19292 return false;
19293}
19294
19296 if (!VT.isSimple())
19297 return false;
19298
19299 // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that
19300 // negate values directly (fneg is free). So, we don't want to let the DAG
19301 // combiner rewrite fneg into xors and some other instructions. For f16 and
19302 // FullFP16 argument passing, some bitcast nodes may be introduced,
19303 // triggering this DAG combine rewrite, so we are avoiding that with this.
19304 switch (VT.getSimpleVT().SimpleTy) {
19305 default: break;
19306 case MVT::f16:
19307 return Subtarget->hasFullFP16();
19308 }
19309
19310 return false;
19311}
19312
19314 if (!Subtarget->hasMVEIntegerOps())
19315 return nullptr;
19316 Type *SVIType = SVI->getType();
19317 Type *ScalarType = SVIType->getScalarType();
19318
19319 if (ScalarType->isFloatTy())
19320 return Type::getInt32Ty(SVIType->getContext());
19321 if (ScalarType->isHalfTy())
19322 return Type::getInt16Ty(SVIType->getContext());
19323 return nullptr;
19324}
19325
19327 EVT VT = ExtVal.getValueType();
19328
19329 if (!isTypeLegal(VT))
19330 return false;
19331
19332 if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.getOperand(0))) {
19333 if (Ld->isExpandingLoad())
19334 return false;
19335 }
19336
19337 if (Subtarget->hasMVEIntegerOps())
19338 return true;
19339
19340 // Don't create a loadext if we can fold the extension into a wide/long
19341 // instruction.
19342 // If there's more than one user instruction, the loadext is desirable no
19343 // matter what. There can be two uses by the same instruction.
19344 if (ExtVal->use_empty() ||
19345 !ExtVal->user_begin()->isOnlyUserOf(ExtVal.getNode()))
19346 return true;
19347
19348 SDNode *U = *ExtVal->user_begin();
19349 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
19350 U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
19351 return false;
19352
19353 return true;
19354}
19355
19357 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19358 return false;
19359
19360 if (!isTypeLegal(EVT::getEVT(Ty1)))
19361 return false;
19362
19363 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
19364
19365 // Assuming the caller doesn't have a zeroext or signext return parameter,
19366 // truncation all the way down to i1 is valid.
19367 return true;
19368}
19369
19370/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
19371/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
19372/// expanded to FMAs when this method returns true, otherwise fmuladd is
19373/// expanded to fmul + fadd.
19374///
19375/// ARM supports both fused and unfused multiply-add operations; we already
19376/// lower a pair of fmul and fadd to the latter so it's not clear that there
19377/// would be a gain or that the gain would be worthwhile enough to risk
19378/// correctness bugs.
19379///
19380/// For MVE, we set this to true as it helps simplify the need for some
19381/// patterns (and we don't have the non-fused floating point instruction).
19382bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
19383 EVT VT) const {
19384 if (Subtarget->useSoftFloat())
19385 return false;
19386
19387 if (!VT.isSimple())
19388 return false;
19389
19390 switch (VT.getSimpleVT().SimpleTy) {
19391 case MVT::v4f32:
19392 case MVT::v8f16:
19393 return Subtarget->hasMVEFloatOps();
19394 case MVT::f16:
19395 return Subtarget->useFPVFMx16();
19396 case MVT::f32:
19397 return Subtarget->useFPVFMx();
19398 case MVT::f64:
19399 return Subtarget->useFPVFMx64();
19400 default:
19401 break;
19402 }
19403
19404 return false;
19405}
19406
19407static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
19408 if (V < 0)
19409 return false;
19410
19411 unsigned Scale = 1;
19412 switch (VT.getSimpleVT().SimpleTy) {
19413 case MVT::i1:
19414 case MVT::i8:
19415 // Scale == 1;
19416 break;
19417 case MVT::i16:
19418 // Scale == 2;
19419 Scale = 2;
19420 break;
19421 default:
19422 // On thumb1 we load most things (i32, i64, floats, etc) with a LDR
19423 // Scale == 4;
19424 Scale = 4;
19425 break;
19426 }
19427
19428 if ((V & (Scale - 1)) != 0)
19429 return false;
19430 return isUInt<5>(V / Scale);
19431}
19432
19433static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
19434 const ARMSubtarget *Subtarget) {
19435 if (!VT.isInteger() && !VT.isFloatingPoint())
19436 return false;
19437 if (VT.isVector() && Subtarget->hasNEON())
19438 return false;
19439 if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
19440 !Subtarget->hasMVEFloatOps())
19441 return false;
19442
19443 bool IsNeg = false;
19444 if (V < 0) {
19445 IsNeg = true;
19446 V = -V;
19447 }
19448
19449 unsigned NumBytes = std::max((unsigned)VT.getSizeInBits() / 8, 1U);
19450
19451 // MVE: size * imm7
19452 if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
19453 switch (VT.getSimpleVT().getVectorElementType().SimpleTy) {
19454 case MVT::i32:
19455 case MVT::f32:
19456 return isShiftedUInt<7,2>(V);
19457 case MVT::i16:
19458 case MVT::f16:
19459 return isShiftedUInt<7,1>(V);
19460 case MVT::i8:
19461 return isUInt<7>(V);
19462 default:
19463 return false;
19464 }
19465 }
19466
19467 // half VLDR: 2 * imm8
19468 if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
19469 return isShiftedUInt<8, 1>(V);
19470 // VLDR and LDRD: 4 * imm8
19471 if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
19472 return isShiftedUInt<8, 2>(V);
19473
19474 if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
19475 // + imm12 or - imm8
19476 if (IsNeg)
19477 return isUInt<8>(V);
19478 return isUInt<12>(V);
19479 }
19480
19481 return false;
19482}
19483
19484/// isLegalAddressImmediate - Return true if the integer value can be used
19485/// as the offset of the target addressing mode for load / store of the
19486/// given type.
19487static bool isLegalAddressImmediate(int64_t V, EVT VT,
19488 const ARMSubtarget *Subtarget) {
19489 if (V == 0)
19490 return true;
19491
19492 if (!VT.isSimple())
19493 return false;
19494
19495 if (Subtarget->isThumb1Only())
19496 return isLegalT1AddressImmediate(V, VT);
19497 else if (Subtarget->isThumb2())
19498 return isLegalT2AddressImmediate(V, VT, Subtarget);
19499
19500 // ARM mode.
19501 if (V < 0)
19502 V = - V;
19503 switch (VT.getSimpleVT().SimpleTy) {
19504 default: return false;
19505 case MVT::i1:
19506 case MVT::i8:
19507 case MVT::i32:
19508 // +- imm12
19509 return isUInt<12>(V);
19510 case MVT::i16:
19511 // +- imm8
19512 return isUInt<8>(V);
19513 case MVT::f32:
19514 case MVT::f64:
19515 if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
19516 return false;
19517 return isShiftedUInt<8, 2>(V);
19518 }
19519}
19520
19522 EVT VT) const {
19523 int Scale = AM.Scale;
19524 if (Scale < 0)
19525 return false;
19526
19527 switch (VT.getSimpleVT().SimpleTy) {
19528 default: return false;
19529 case MVT::i1:
19530 case MVT::i8:
19531 case MVT::i16:
19532 case MVT::i32:
19533 if (Scale == 1)
19534 return true;
19535 // r + r << imm
19536 Scale = Scale & ~1;
19537 return Scale == 2 || Scale == 4 || Scale == 8;
19538 case MVT::i64:
19539 // FIXME: What are we trying to model here? ldrd doesn't have an r + r
19540 // version in Thumb mode.
19541 // r + r
19542 if (Scale == 1)
19543 return true;
19544 // r * 2 (this can be lowered to r + r).
19545 if (!AM.HasBaseReg && Scale == 2)
19546 return true;
19547 return false;
19548 case MVT::isVoid:
19549 // Note, we allow "void" uses (basically, uses that aren't loads or
19550 // stores), because arm allows folding a scale into many arithmetic
19551 // operations. This should be made more precise and revisited later.
19552
19553 // Allow r << imm, but the imm has to be a multiple of two.
19554 if (Scale & 1) return false;
19555 return isPowerOf2_32(Scale);
19556 }
19557}
19558
19560 EVT VT) const {
19561 const int Scale = AM.Scale;
19562
19563 // Negative scales are not supported in Thumb1.
19564 if (Scale < 0)
19565 return false;
19566
19567 // Thumb1 addressing modes do not support register scaling excepting the
19568 // following cases:
19569 // 1. Scale == 1 means no scaling.
19570 // 2. Scale == 2 this can be lowered to r + r if there is no base register.
19571 return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);
19572}
19573
19574/// isLegalAddressingMode - Return true if the addressing mode represented
19575/// by AM is legal for this target, for a load/store of the specified type.
19577 const AddrMode &AM, Type *Ty,
19578 unsigned AS, Instruction *I) const {
19579 EVT VT = getValueType(DL, Ty, true);
19580 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
19581 return false;
19582
19583 // Can never fold addr of global into load/store.
19584 if (AM.BaseGV)
19585 return false;
19586
19587 switch (AM.Scale) {
19588 case 0: // no scale reg, must be "r+i" or "r", or "i".
19589 break;
19590 default:
19591 // ARM doesn't support any R+R*scale+imm addr modes.
19592 if (AM.BaseOffs)
19593 return false;
19594
19595 if (!VT.isSimple())
19596 return false;
19597
19598 if (Subtarget->isThumb1Only())
19599 return isLegalT1ScaledAddressingMode(AM, VT);
19600
19601 if (Subtarget->isThumb2())
19602 return isLegalT2ScaledAddressingMode(AM, VT);
19603
19604 int Scale = AM.Scale;
19605 switch (VT.getSimpleVT().SimpleTy) {
19606 default: return false;
19607 case MVT::i1:
19608 case MVT::i8:
19609 case MVT::i32:
19610 if (Scale < 0) Scale = -Scale;
19611 if (Scale == 1)
19612 return true;
19613 // r + r << imm
19614 return isPowerOf2_32(Scale & ~1);
19615 case MVT::i16:
19616 case MVT::i64:
19617 // r +/- r
19618 if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
19619 return true;
19620 // r * 2 (this can be lowered to r + r).
19621 if (!AM.HasBaseReg && Scale == 2)
19622 return true;
19623 return false;
19624
19625 case MVT::isVoid:
19626 // Note, we allow "void" uses (basically, uses that aren't loads or
19627 // stores), because arm allows folding a scale into many arithmetic
19628 // operations. This should be made more precise and revisited later.
19629
19630 // Allow r << imm, but the imm has to be a multiple of two.
19631 if (Scale & 1) return false;
19632 return isPowerOf2_32(Scale);
19633 }
19634 }
19635 return true;
19636}
19637
19638/// isLegalICmpImmediate - Return true if the specified immediate is legal
19639/// icmp immediate, that is the target has icmp instructions which can compare
19640/// a register against the immediate without having to materialize the
19641/// immediate into a register.
19643 // Thumb2 and ARM modes can use cmn for negative immediates.
19644 if (!Subtarget->isThumb())
19645 return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
19646 ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
19647 if (Subtarget->isThumb2())
19648 return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
19649 ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
19650 // Thumb1 doesn't have cmn, and only 8-bit immediates.
19651 return Imm >= 0 && Imm <= 255;
19652}
19653
19654/// isLegalAddImmediate - Return true if the specified immediate is a legal add
19655/// *or sub* immediate, that is the target has add or sub instructions which can
19656/// add a register with the immediate without having to materialize the
19657/// immediate into a register.
19659 // Same encoding for add/sub, just flip the sign.
19660 int64_t AbsImm = std::abs(Imm);
19661 if (!Subtarget->isThumb())
19662 return ARM_AM::getSOImmVal(AbsImm) != -1;
19663 if (Subtarget->isThumb2())
19664 return ARM_AM::getT2SOImmVal(AbsImm) != -1;
19665 // Thumb1 only has 8-bit unsigned immediate.
19666 return AbsImm >= 0 && AbsImm <= 255;
19667}
19668
19669// Return false to prevent folding
19670// (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
19671// if the folding leads to worse code.
19673 SDValue ConstNode) const {
19674 // Let the DAGCombiner decide for vector types and large types.
19675 const EVT VT = AddNode.getValueType();
19676 if (VT.isVector() || VT.getScalarSizeInBits() > 32)
19677 return true;
19678
19679 // It is worse if c0 is legal add immediate, while c1*c0 is not
19680 // and has to be composed by at least two instructions.
19681 const ConstantSDNode *C0Node = cast<ConstantSDNode>(AddNode.getOperand(1));
19682 const ConstantSDNode *C1Node = cast<ConstantSDNode>(ConstNode);
19683 const int64_t C0 = C0Node->getSExtValue();
19684 APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue();
19686 return true;
19687 if (ConstantMaterializationCost((unsigned)CA.getZExtValue(), Subtarget) > 1)
19688 return false;
19689
19690 // Default to true and let the DAGCombiner decide.
19691 return true;
19692}
19693
19695 bool isSEXTLoad, SDValue &Base,
19696 SDValue &Offset, bool &isInc,
19697 SelectionDAG &DAG) {
19698 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19699 return false;
19700
19701 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
19702 // AddressingMode 3
19703 Base = Ptr->getOperand(0);
19704 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19705 int RHSC = (int)RHS->getZExtValue();
19706 if (RHSC < 0 && RHSC > -256) {
19707 assert(Ptr->getOpcode() == ISD::ADD);
19708 isInc = false;
19709 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19710 return true;
19711 }
19712 }
19713 isInc = (Ptr->getOpcode() == ISD::ADD);
19714 Offset = Ptr->getOperand(1);
19715 return true;
19716 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
19717 // AddressingMode 2
19718 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19719 int RHSC = (int)RHS->getZExtValue();
19720 if (RHSC < 0 && RHSC > -0x1000) {
19721 assert(Ptr->getOpcode() == ISD::ADD);
19722 isInc = false;
19723 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19724 Base = Ptr->getOperand(0);
19725 return true;
19726 }
19727 }
19728
19729 if (Ptr->getOpcode() == ISD::ADD) {
19730 isInc = true;
19731 ARM_AM::ShiftOpc ShOpcVal=
19732 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
19733 if (ShOpcVal != ARM_AM::no_shift) {
19734 Base = Ptr->getOperand(1);
19735 Offset = Ptr->getOperand(0);
19736 } else {
19737 Base = Ptr->getOperand(0);
19738 Offset = Ptr->getOperand(1);
19739 }
19740 return true;
19741 }
19742
19743 isInc = (Ptr->getOpcode() == ISD::ADD);
19744 Base = Ptr->getOperand(0);
19745 Offset = Ptr->getOperand(1);
19746 return true;
19747 }
19748
19749 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
19750 return false;
19751}
19752
19754 bool isSEXTLoad, SDValue &Base,
19755 SDValue &Offset, bool &isInc,
19756 SelectionDAG &DAG) {
19757 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19758 return false;
19759
19760 Base = Ptr->getOperand(0);
19761 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19762 int RHSC = (int)RHS->getZExtValue();
19763 if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
19764 assert(Ptr->getOpcode() == ISD::ADD);
19765 isInc = false;
19766 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19767 return true;
19768 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
19769 isInc = Ptr->getOpcode() == ISD::ADD;
19770 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19771 return true;
19772 }
19773 }
19774
19775 return false;
19776}
19777
19778static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment,
19779 bool isSEXTLoad, bool IsMasked, bool isLE,
19781 bool &isInc, SelectionDAG &DAG) {
19782 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19783 return false;
19784 if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
19785 return false;
19786
19787 // We allow LE non-masked loads to change the type (for example use a vldrb.8
19788 // as opposed to a vldrw.32). This can allow extra addressing modes or
19789 // alignments for what is otherwise an equivalent instruction.
19790 bool CanChangeType = isLE && !IsMasked;
19791
19792 ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
19793 int RHSC = (int)RHS->getZExtValue();
19794
19795 auto IsInRange = [&](int RHSC, int Limit, int Scale) {
19796 if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
19797 assert(Ptr->getOpcode() == ISD::ADD);
19798 isInc = false;
19799 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19800 return true;
19801 } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
19802 isInc = Ptr->getOpcode() == ISD::ADD;
19803 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19804 return true;
19805 }
19806 return false;
19807 };
19808
19809 // Try to find a matching instruction based on s/zext, Alignment, Offset and
19810 // (in BE/masked) type.
19811 Base = Ptr->getOperand(0);
19812 if (VT == MVT::v4i16) {
19813 if (Alignment >= 2 && IsInRange(RHSC, 0x80, 2))
19814 return true;
19815 } else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
19816 if (IsInRange(RHSC, 0x80, 1))
19817 return true;
19818 } else if (Alignment >= 4 &&
19819 (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
19820 IsInRange(RHSC, 0x80, 4))
19821 return true;
19822 else if (Alignment >= 2 &&
19823 (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
19824 IsInRange(RHSC, 0x80, 2))
19825 return true;
19826 else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
19827 return true;
19828 return false;
19829}
19830
19831/// getPreIndexedAddressParts - returns true by value, base pointer and
19832/// offset pointer and addressing mode by reference if the node's address
19833/// can be legally represented as pre-indexed load / store address.
19834bool
19836 SDValue &Offset,
19838 SelectionDAG &DAG) const {
19839 if (Subtarget->isThumb1Only())
19840 return false;
19841
19842 EVT VT;
19843 SDValue Ptr;
19844 Align Alignment;
19845 bool isSEXTLoad = false;
19846 bool IsMasked = false;
19847 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19848 Ptr = LD->getBasePtr();
19849 VT = LD->getMemoryVT();
19850 Alignment = LD->getAlign();
19851 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19852 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19853 Ptr = ST->getBasePtr();
19854 VT = ST->getMemoryVT();
19855 Alignment = ST->getAlign();
19856 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19857 Ptr = LD->getBasePtr();
19858 VT = LD->getMemoryVT();
19859 Alignment = LD->getAlign();
19860 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19861 IsMasked = true;
19862 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19863 Ptr = ST->getBasePtr();
19864 VT = ST->getMemoryVT();
19865 Alignment = ST->getAlign();
19866 IsMasked = true;
19867 } else
19868 return false;
19869
19870 bool isInc;
19871 bool isLegal = false;
19872 if (VT.isVector())
19873 isLegal = Subtarget->hasMVEIntegerOps() &&
19875 Ptr.getNode(), VT, Alignment, isSEXTLoad, IsMasked,
19876 Subtarget->isLittle(), Base, Offset, isInc, DAG);
19877 else {
19878 if (Subtarget->isThumb2())
19879 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19880 Offset, isInc, DAG);
19881 else
19882 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19883 Offset, isInc, DAG);
19884 }
19885 if (!isLegal)
19886 return false;
19887
19888 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
19889 return true;
19890}
19891
19892/// getPostIndexedAddressParts - returns true by value, base pointer and
19893/// offset pointer and addressing mode by reference if this node can be
19894/// combined with a load / store to form a post-indexed load / store.
19896 SDValue &Base,
19897 SDValue &Offset,
19899 SelectionDAG &DAG) const {
19900 EVT VT;
19901 SDValue Ptr;
19902 Align Alignment;
19903 bool isSEXTLoad = false, isNonExt;
19904 bool IsMasked = false;
19905 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19906 VT = LD->getMemoryVT();
19907 Ptr = LD->getBasePtr();
19908 Alignment = LD->getAlign();
19909 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19910 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19911 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19912 VT = ST->getMemoryVT();
19913 Ptr = ST->getBasePtr();
19914 Alignment = ST->getAlign();
19915 isNonExt = !ST->isTruncatingStore();
19916 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19917 VT = LD->getMemoryVT();
19918 Ptr = LD->getBasePtr();
19919 Alignment = LD->getAlign();
19920 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19921 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19922 IsMasked = true;
19923 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19924 VT = ST->getMemoryVT();
19925 Ptr = ST->getBasePtr();
19926 Alignment = ST->getAlign();
19927 isNonExt = !ST->isTruncatingStore();
19928 IsMasked = true;
19929 } else
19930 return false;
19931
19932 if (Subtarget->isThumb1Only()) {
19933 // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
19934 // must be non-extending/truncating, i32, with an offset of 4.
19935 assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
19936 if (Op->getOpcode() != ISD::ADD || !isNonExt)
19937 return false;
19938 auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
19939 if (!RHS || RHS->getZExtValue() != 4)
19940 return false;
19941 if (Alignment < Align(4))
19942 return false;
19943
19944 Offset = Op->getOperand(1);
19945 Base = Op->getOperand(0);
19946 AM = ISD::POST_INC;
19947 return true;
19948 }
19949
19950 bool isInc;
19951 bool isLegal = false;
19952 if (VT.isVector())
19953 isLegal = Subtarget->hasMVEIntegerOps() &&
19954 getMVEIndexedAddressParts(Op, VT, Alignment, isSEXTLoad, IsMasked,
19955 Subtarget->isLittle(), Base, Offset,
19956 isInc, DAG);
19957 else {
19958 if (Subtarget->isThumb2())
19959 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
19960 isInc, DAG);
19961 else
19962 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
19963 isInc, DAG);
19964 }
19965 if (!isLegal)
19966 return false;
19967
19968 if (Ptr != Base) {
19969 // Swap base ptr and offset to catch more post-index load / store when
19970 // it's legal. In Thumb2 mode, offset must be an immediate.
19971 if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
19972 !Subtarget->isThumb2())
19974
19975 // Post-indexed load / store update the base pointer.
19976 if (Ptr != Base)
19977 return false;
19978 }
19979
19980 AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
19981 return true;
19982}
19983
19985 KnownBits &Known,
19986 const APInt &DemandedElts,
19987 const SelectionDAG &DAG,
19988 unsigned Depth) const {
19989 unsigned BitWidth = Known.getBitWidth();
19990 Known.resetAll();
19991 switch (Op.getOpcode()) {
19992 default: break;
19993 case ARMISD::ADDC:
19994 case ARMISD::ADDE:
19995 case ARMISD::SUBC:
19996 case ARMISD::SUBE:
19997 // Special cases when we convert a carry to a boolean.
19998 if (Op.getResNo() == 0) {
19999 SDValue LHS = Op.getOperand(0);
20000 SDValue RHS = Op.getOperand(1);
20001 // (ADDE 0, 0, C) will give us a single bit.
20002 if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
20005 return;
20006 }
20007 }
20008 break;
20009 case ARMISD::CMOV: {
20010 // Bits are known zero/one if known on the LHS and RHS.
20011 Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
20012 if (Known.isUnknown())
20013 return;
20014
20015 KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
20016 Known = Known.intersectWith(KnownRHS);
20017 return;
20018 }
20020 Intrinsic::ID IntID =
20021 static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1));
20022 switch (IntID) {
20023 default: return;
20024 case Intrinsic::arm_ldaex:
20025 case Intrinsic::arm_ldrex: {
20026 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
20027 unsigned MemBits = VT.getScalarSizeInBits();
20028 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
20029 return;
20030 }
20031 }
20032 }
20033 case ARMISD::BFI: {
20034 // Conservatively, we can recurse down the first operand
20035 // and just mask out all affected bits.
20036 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
20037
20038 // The operand to BFI is already a mask suitable for removing the bits it
20039 // sets.
20040 const APInt &Mask = Op.getConstantOperandAPInt(2);
20041 Known.Zero &= Mask;
20042 Known.One &= Mask;
20043 return;
20044 }
20045 case ARMISD::VGETLANEs:
20046 case ARMISD::VGETLANEu: {
20047 const SDValue &SrcSV = Op.getOperand(0);
20048 EVT VecVT = SrcSV.getValueType();
20049 assert(VecVT.isVector() && "VGETLANE expected a vector type");
20050 const unsigned NumSrcElts = VecVT.getVectorNumElements();
20051 ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
20052 assert(Pos->getAPIntValue().ult(NumSrcElts) &&
20053 "VGETLANE index out of bounds");
20054 unsigned Idx = Pos->getZExtValue();
20055 APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
20056 Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
20057
20058 EVT VT = Op.getValueType();
20059 const unsigned DstSz = VT.getScalarSizeInBits();
20060 const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
20061 (void)SrcSz;
20062 assert(SrcSz == Known.getBitWidth());
20063 assert(DstSz > SrcSz);
20064 if (Op.getOpcode() == ARMISD::VGETLANEs)
20065 Known = Known.sext(DstSz);
20066 else {
20067 Known = Known.zext(DstSz);
20068 }
20069 assert(DstSz == Known.getBitWidth());
20070 break;
20071 }
20072 case ARMISD::VMOVrh: {
20073 KnownBits KnownOp = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20074 assert(KnownOp.getBitWidth() == 16);
20075 Known = KnownOp.zext(32);
20076 break;
20077 }
20078 case ARMISD::CSINC:
20079 case ARMISD::CSINV:
20080 case ARMISD::CSNEG: {
20081 KnownBits KnownOp0 = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20082 KnownBits KnownOp1 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
20083
20084 // The result is either:
20085 // CSINC: KnownOp0 or KnownOp1 + 1
20086 // CSINV: KnownOp0 or ~KnownOp1
20087 // CSNEG: KnownOp0 or KnownOp1 * -1
20088 if (Op.getOpcode() == ARMISD::CSINC)
20089 KnownOp1 =
20090 KnownBits::add(KnownOp1, KnownBits::makeConstant(APInt(32, 1)));
20091 else if (Op.getOpcode() == ARMISD::CSINV)
20092 std::swap(KnownOp1.Zero, KnownOp1.One);
20093 else if (Op.getOpcode() == ARMISD::CSNEG)
20094 KnownOp1 = KnownBits::mul(KnownOp1,
20096
20097 Known = KnownOp0.intersectWith(KnownOp1);
20098 break;
20099 }
20100 }
20101}
20102
20104 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
20105 TargetLoweringOpt &TLO) const {
20106 // Delay optimization, so we don't have to deal with illegal types, or block
20107 // optimizations.
20108 if (!TLO.LegalOps)
20109 return false;
20110
20111 // Only optimize AND for now.
20112 if (Op.getOpcode() != ISD::AND)
20113 return false;
20114
20115 EVT VT = Op.getValueType();
20116
20117 // Ignore vectors.
20118 if (VT.isVector())
20119 return false;
20120
20121 assert(VT == MVT::i32 && "Unexpected integer type");
20122
20123 // Make sure the RHS really is a constant.
20124 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
20125 if (!C)
20126 return false;
20127
20128 unsigned Mask = C->getZExtValue();
20129
20130 unsigned Demanded = DemandedBits.getZExtValue();
20131 unsigned ShrunkMask = Mask & Demanded;
20132 unsigned ExpandedMask = Mask | ~Demanded;
20133
20134 // If the mask is all zeros, let the target-independent code replace the
20135 // result with zero.
20136 if (ShrunkMask == 0)
20137 return false;
20138
20139 // If the mask is all ones, erase the AND. (Currently, the target-independent
20140 // code won't do this, so we have to do it explicitly to avoid an infinite
20141 // loop in obscure cases.)
20142 if (ExpandedMask == ~0U)
20143 return TLO.CombineTo(Op, Op.getOperand(0));
20144
20145 auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
20146 return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
20147 };
20148 auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
20149 if (NewMask == Mask)
20150 return true;
20151 SDLoc DL(Op);
20152 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
20153 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
20154 return TLO.CombineTo(Op, NewOp);
20155 };
20156
20157 // Prefer uxtb mask.
20158 if (IsLegalMask(0xFF))
20159 return UseMask(0xFF);
20160
20161 // Prefer uxth mask.
20162 if (IsLegalMask(0xFFFF))
20163 return UseMask(0xFFFF);
20164
20165 // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
20166 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20167 if (ShrunkMask < 256)
20168 return UseMask(ShrunkMask);
20169
20170 // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
20171 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20172 if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
20173 return UseMask(ExpandedMask);
20174
20175 // Potential improvements:
20176 //
20177 // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
20178 // We could try to prefer Thumb1 immediates which can be lowered to a
20179 // two-instruction sequence.
20180 // We could try to recognize more legal ARM/Thumb2 immediates here.
20181
20182 return false;
20183}
20184
20186 SDValue Op, const APInt &OriginalDemandedBits,
20187 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
20188 unsigned Depth) const {
20189 unsigned Opc = Op.getOpcode();
20190
20191 switch (Opc) {
20192 case ARMISD::ASRL:
20193 case ARMISD::LSRL: {
20194 // If this is result 0 and the other result is unused, see if the demand
20195 // bits allow us to shrink this long shift into a standard small shift in
20196 // the opposite direction.
20197 if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
20198 isa<ConstantSDNode>(Op->getOperand(2))) {
20199 unsigned ShAmt = Op->getConstantOperandVal(2);
20200 if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(APInt::getAllOnes(32)
20201 << (32 - ShAmt)))
20202 return TLO.CombineTo(
20203 Op, TLO.DAG.getNode(
20204 ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1),
20205 TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32)));
20206 }
20207 break;
20208 }
20209 case ARMISD::VBICIMM: {
20210 SDValue Op0 = Op.getOperand(0);
20211 unsigned ModImm = Op.getConstantOperandVal(1);
20212 unsigned EltBits = 0;
20213 uint64_t Mask = ARM_AM::decodeVMOVModImm(ModImm, EltBits);
20214 if ((OriginalDemandedBits & Mask) == 0)
20215 return TLO.CombineTo(Op, Op0);
20216 }
20217 }
20218
20220 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
20221}
20222
20223//===----------------------------------------------------------------------===//
20224// ARM Inline Assembly Support
20225//===----------------------------------------------------------------------===//
20226
20228 // Looking for "rev" which is V6+.
20229 if (!Subtarget->hasV6Ops())
20230 return false;
20231
20232 InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
20233 StringRef AsmStr = IA->getAsmString();
20234 SmallVector<StringRef, 4> AsmPieces;
20235 SplitString(AsmStr, AsmPieces, ";\n");
20236
20237 switch (AsmPieces.size()) {
20238 default: return false;
20239 case 1:
20240 AsmStr = AsmPieces[0];
20241 AsmPieces.clear();
20242 SplitString(AsmStr, AsmPieces, " \t,");
20243
20244 // rev $0, $1
20245 if (AsmPieces.size() == 3 &&
20246 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
20247 IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
20248 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
20249 if (Ty && Ty->getBitWidth() == 32)
20251 }
20252 break;
20253 }
20254
20255 return false;
20256}
20257
20258const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
20259 // At this point, we have to lower this constraint to something else, so we
20260 // lower it to an "r" or "w". However, by doing this we will force the result
20261 // to be in register, while the X constraint is much more permissive.
20262 //
20263 // Although we are correct (we are free to emit anything, without
20264 // constraints), we might break use cases that would expect us to be more
20265 // efficient and emit something else.
20266 if (!Subtarget->hasVFP2Base())
20267 return "r";
20268 if (ConstraintVT.isFloatingPoint())
20269 return "w";
20270 if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
20271 (ConstraintVT.getSizeInBits() == 64 ||
20272 ConstraintVT.getSizeInBits() == 128))
20273 return "w";
20274
20275 return "r";
20276}
20277
20278/// getConstraintType - Given a constraint letter, return the type of
20279/// constraint it is for this target.
20282 unsigned S = Constraint.size();
20283 if (S == 1) {
20284 switch (Constraint[0]) {
20285 default: break;
20286 case 'l': return C_RegisterClass;
20287 case 'w': return C_RegisterClass;
20288 case 'h': return C_RegisterClass;
20289 case 'x': return C_RegisterClass;
20290 case 't': return C_RegisterClass;
20291 case 'j': return C_Immediate; // Constant for movw.
20292 // An address with a single base register. Due to the way we
20293 // currently handle addresses it is the same as an 'r' memory constraint.
20294 case 'Q': return C_Memory;
20295 }
20296 } else if (S == 2) {
20297 switch (Constraint[0]) {
20298 default: break;
20299 case 'T': return C_RegisterClass;
20300 // All 'U+' constraints are addresses.
20301 case 'U': return C_Memory;
20302 }
20303 }
20304 return TargetLowering::getConstraintType(Constraint);
20305}
20306
20307/// Examine constraint type and operand type and determine a weight value.
20308/// This object must already have been set up with the operand type
20309/// and the current alternative constraint selected.
20312 AsmOperandInfo &info, const char *constraint) const {
20314 Value *CallOperandVal = info.CallOperandVal;
20315 // If we don't have a value, we can't do a match,
20316 // but allow it at the lowest weight.
20317 if (!CallOperandVal)
20318 return CW_Default;
20319 Type *type = CallOperandVal->getType();
20320 // Look at the constraint type.
20321 switch (*constraint) {
20322 default:
20324 break;
20325 case 'l':
20326 if (type->isIntegerTy()) {
20327 if (Subtarget->isThumb())
20328 weight = CW_SpecificReg;
20329 else
20330 weight = CW_Register;
20331 }
20332 break;
20333 case 'w':
20334 if (type->isFloatingPointTy())
20335 weight = CW_Register;
20336 break;
20337 }
20338 return weight;
20339}
20340
20341using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
20342
20344 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
20345 switch (Constraint.size()) {
20346 case 1:
20347 // GCC ARM Constraint Letters
20348 switch (Constraint[0]) {
20349 case 'l': // Low regs or general regs.
20350 if (Subtarget->isThumb())
20351 return RCPair(0U, &ARM::tGPRRegClass);
20352 return RCPair(0U, &ARM::GPRRegClass);
20353 case 'h': // High regs or no regs.
20354 if (Subtarget->isThumb())
20355 return RCPair(0U, &ARM::hGPRRegClass);
20356 break;
20357 case 'r':
20358 if (Subtarget->isThumb1Only())
20359 return RCPair(0U, &ARM::tGPRRegClass);
20360 return RCPair(0U, &ARM::GPRRegClass);
20361 case 'w':
20362 if (VT == MVT::Other)
20363 break;
20364 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20365 return RCPair(0U, &ARM::SPRRegClass);
20366 if (VT.getSizeInBits() == 64)
20367 return RCPair(0U, &ARM::DPRRegClass);
20368 if (VT.getSizeInBits() == 128)
20369 return RCPair(0U, &ARM::QPRRegClass);
20370 break;
20371 case 'x':
20372 if (VT == MVT::Other)
20373 break;
20374 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20375 return RCPair(0U, &ARM::SPR_8RegClass);
20376 if (VT.getSizeInBits() == 64)
20377 return RCPair(0U, &ARM::DPR_8RegClass);
20378 if (VT.getSizeInBits() == 128)
20379 return RCPair(0U, &ARM::QPR_8RegClass);
20380 break;
20381 case 't':
20382 if (VT == MVT::Other)
20383 break;
20384 if (VT == MVT::f32 || VT == MVT::i32 || VT == MVT::f16 || VT == MVT::bf16)
20385 return RCPair(0U, &ARM::SPRRegClass);
20386 if (VT.getSizeInBits() == 64)
20387 return RCPair(0U, &ARM::DPR_VFP2RegClass);
20388 if (VT.getSizeInBits() == 128)
20389 return RCPair(0U, &ARM::QPR_VFP2RegClass);
20390 break;
20391 }
20392 break;
20393
20394 case 2:
20395 if (Constraint[0] == 'T') {
20396 switch (Constraint[1]) {
20397 default:
20398 break;
20399 case 'e':
20400 return RCPair(0U, &ARM::tGPREvenRegClass);
20401 case 'o':
20402 return RCPair(0U, &ARM::tGPROddRegClass);
20403 }
20404 }
20405 break;
20406
20407 default:
20408 break;
20409 }
20410
20411 if (StringRef("{cc}").equals_insensitive(Constraint))
20412 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
20413
20414 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
20415}
20416
20417/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
20418/// vector. If it is invalid, don't add anything to Ops.
20420 StringRef Constraint,
20421 std::vector<SDValue> &Ops,
20422 SelectionDAG &DAG) const {
20423 SDValue Result;
20424
20425 // Currently only support length 1 constraints.
20426 if (Constraint.size() != 1)
20427 return;
20428
20429 char ConstraintLetter = Constraint[0];
20430 switch (ConstraintLetter) {
20431 default: break;
20432 case 'j':
20433 case 'I': case 'J': case 'K': case 'L':
20434 case 'M': case 'N': case 'O':
20435 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
20436 if (!C)
20437 return;
20438
20439 int64_t CVal64 = C->getSExtValue();
20440 int CVal = (int) CVal64;
20441 // None of these constraints allow values larger than 32 bits. Check
20442 // that the value fits in an int.
20443 if (CVal != CVal64)
20444 return;
20445
20446 switch (ConstraintLetter) {
20447 case 'j':
20448 // Constant suitable for movw, must be between 0 and
20449 // 65535.
20450 if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
20451 if (CVal >= 0 && CVal <= 65535)
20452 break;
20453 return;
20454 case 'I':
20455 if (Subtarget->isThumb1Only()) {
20456 // This must be a constant between 0 and 255, for ADD
20457 // immediates.
20458 if (CVal >= 0 && CVal <= 255)
20459 break;
20460 } else if (Subtarget->isThumb2()) {
20461 // A constant that can be used as an immediate value in a
20462 // data-processing instruction.
20463 if (ARM_AM::getT2SOImmVal(CVal) != -1)
20464 break;
20465 } else {
20466 // A constant that can be used as an immediate value in a
20467 // data-processing instruction.
20468 if (ARM_AM::getSOImmVal(CVal) != -1)
20469 break;
20470 }
20471 return;
20472
20473 case 'J':
20474 if (Subtarget->isThumb1Only()) {
20475 // This must be a constant between -255 and -1, for negated ADD
20476 // immediates. This can be used in GCC with an "n" modifier that
20477 // prints the negated value, for use with SUB instructions. It is
20478 // not useful otherwise but is implemented for compatibility.
20479 if (CVal >= -255 && CVal <= -1)
20480 break;
20481 } else {
20482 // This must be a constant between -4095 and 4095. It is not clear
20483 // what this constraint is intended for. Implemented for
20484 // compatibility with GCC.
20485 if (CVal >= -4095 && CVal <= 4095)
20486 break;
20487 }
20488 return;
20489
20490 case 'K':
20491 if (Subtarget->isThumb1Only()) {
20492 // A 32-bit value where only one byte has a nonzero value. Exclude
20493 // zero to match GCC. This constraint is used by GCC internally for
20494 // constants that can be loaded with a move/shift combination.
20495 // It is not useful otherwise but is implemented for compatibility.
20496 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
20497 break;
20498 } else if (Subtarget->isThumb2()) {
20499 // A constant whose bitwise inverse can be used as an immediate
20500 // value in a data-processing instruction. This can be used in GCC
20501 // with a "B" modifier that prints the inverted value, for use with
20502 // BIC and MVN instructions. It is not useful otherwise but is
20503 // implemented for compatibility.
20504 if (ARM_AM::getT2SOImmVal(~CVal) != -1)
20505 break;
20506 } else {
20507 // A constant whose bitwise inverse can be used as an immediate
20508 // value in a data-processing instruction. This can be used in GCC
20509 // with a "B" modifier that prints the inverted value, for use with
20510 // BIC and MVN instructions. It is not useful otherwise but is
20511 // implemented for compatibility.
20512 if (ARM_AM::getSOImmVal(~CVal) != -1)
20513 break;
20514 }
20515 return;
20516
20517 case 'L':
20518 if (Subtarget->isThumb1Only()) {
20519 // This must be a constant between -7 and 7,
20520 // for 3-operand ADD/SUB immediate instructions.
20521 if (CVal >= -7 && CVal < 7)
20522 break;
20523 } else if (Subtarget->isThumb2()) {
20524 // A constant whose negation can be used as an immediate value in a
20525 // data-processing instruction. This can be used in GCC with an "n"
20526 // modifier that prints the negated value, for use with SUB
20527 // instructions. It is not useful otherwise but is implemented for
20528 // compatibility.
20529 if (ARM_AM::getT2SOImmVal(-CVal) != -1)
20530 break;
20531 } else {
20532 // A constant whose negation can be used as an immediate value in a
20533 // data-processing instruction. This can be used in GCC with an "n"
20534 // modifier that prints the negated value, for use with SUB
20535 // instructions. It is not useful otherwise but is implemented for
20536 // compatibility.
20537 if (ARM_AM::getSOImmVal(-CVal) != -1)
20538 break;
20539 }
20540 return;
20541
20542 case 'M':
20543 if (Subtarget->isThumb1Only()) {
20544 // This must be a multiple of 4 between 0 and 1020, for
20545 // ADD sp + immediate.
20546 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
20547 break;
20548 } else {
20549 // A power of two or a constant between 0 and 32. This is used in
20550 // GCC for the shift amount on shifted register operands, but it is
20551 // useful in general for any shift amounts.
20552 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
20553 break;
20554 }
20555 return;
20556
20557 case 'N':
20558 if (Subtarget->isThumb1Only()) {
20559 // This must be a constant between 0 and 31, for shift amounts.
20560 if (CVal >= 0 && CVal <= 31)
20561 break;
20562 }
20563 return;
20564
20565 case 'O':
20566 if (Subtarget->isThumb1Only()) {
20567 // This must be a multiple of 4 between -508 and 508, for
20568 // ADD/SUB sp = sp + immediate.
20569 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
20570 break;
20571 }
20572 return;
20573 }
20574 Result = DAG.getSignedTargetConstant(CVal, SDLoc(Op), Op.getValueType());
20575 break;
20576 }
20577
20578 if (Result.getNode()) {
20579 Ops.push_back(Result);
20580 return;
20581 }
20582 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20583}
20584
20586 const SDNode *N, MVT::SimpleValueType SVT) {
20587 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20588 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20589 "Unhandled Opcode in getDivRemLibcall");
20590 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20591 N->getOpcode() == ISD::SREM;
20592 RTLIB::Libcall LC;
20593 switch (SVT) {
20594 default: llvm_unreachable("Unexpected request for libcall!");
20595 case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
20596 case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
20597 case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
20598 case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
20599 }
20600 return LC;
20601}
20602
20604 const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
20605 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20606 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20607 "Unhandled Opcode in getDivRemArgList");
20608 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20609 N->getOpcode() == ISD::SREM;
20612 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20613 EVT ArgVT = N->getOperand(i).getValueType();
20614 Type *ArgTy = ArgVT.getTypeForEVT(*Context);
20615 Entry.Node = N->getOperand(i);
20616 Entry.Ty = ArgTy;
20617 Entry.IsSExt = isSigned;
20618 Entry.IsZExt = !isSigned;
20619 Args.push_back(Entry);
20620 }
20621 if (Subtarget->isTargetWindows() && Args.size() >= 2)
20622 std::swap(Args[0], Args[1]);
20623 return Args;
20624}
20625
20626SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
20627 assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
20628 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
20629 Subtarget->isTargetWindows()) &&
20630 "Register-based DivRem lowering only");
20631 unsigned Opcode = Op->getOpcode();
20632 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
20633 "Invalid opcode for Div/Rem lowering");
20634 bool isSigned = (Opcode == ISD::SDIVREM);
20635 EVT VT = Op->getValueType(0);
20636 SDLoc dl(Op);
20637
20638 if (VT == MVT::i64 && isa<ConstantSDNode>(Op.getOperand(1))) {
20640 if (expandDIVREMByConstant(Op.getNode(), Result, MVT::i32, DAG)) {
20641 SDValue Res0 =
20642 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[0], Result[1]);
20643 SDValue Res1 =
20644 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[2], Result[3]);
20645 return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
20646 {Res0, Res1});
20647 }
20648 }
20649
20650 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
20651
20652 // If the target has hardware divide, use divide + multiply + subtract:
20653 // div = a / b
20654 // rem = a - b * div
20655 // return {div, rem}
20656 // This should be lowered into UDIV/SDIV + MLS later on.
20657 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
20658 : Subtarget->hasDivideInARMMode();
20659 if (hasDivide && Op->getValueType(0).isSimple() &&
20660 Op->getSimpleValueType(0) == MVT::i32) {
20661 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
20662 const SDValue Dividend = Op->getOperand(0);
20663 const SDValue Divisor = Op->getOperand(1);
20664 SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
20665 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
20666 SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
20667
20668 SDValue Values[2] = {Div, Rem};
20669 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
20670 }
20671
20672 RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
20673 VT.getSimpleVT().SimpleTy);
20674 SDValue InChain = DAG.getEntryNode();
20675
20677 DAG.getContext(),
20678 Subtarget);
20679
20682
20683 Type *RetTy = StructType::get(Ty, Ty);
20684
20685 if (Subtarget->isTargetWindows())
20686 InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
20687
20689 CLI.setDebugLoc(dl).setChain(InChain)
20690 .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
20692
20693 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
20694 return CallInfo.first;
20695}
20696
20697// Lowers REM using divmod helpers
20698// see RTABI section 4.2/4.3
20699SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
20700 EVT VT = N->getValueType(0);
20701
20702 if (VT == MVT::i64 && isa<ConstantSDNode>(N->getOperand(1))) {
20704 if (expandDIVREMByConstant(N, Result, MVT::i32, DAG))
20705 return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), N->getValueType(0),
20706 Result[0], Result[1]);
20707 }
20708
20709 // Build return types (div and rem)
20710 std::vector<Type*> RetTyParams;
20711 Type *RetTyElement;
20712
20713 switch (VT.getSimpleVT().SimpleTy) {
20714 default: llvm_unreachable("Unexpected request for libcall!");
20715 case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
20716 case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
20717 case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
20718 case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
20719 }
20720
20721 RetTyParams.push_back(RetTyElement);
20722 RetTyParams.push_back(RetTyElement);
20723 ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
20724 Type *RetTy = StructType::get(*DAG.getContext(), ret);
20725
20726 RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
20727 SimpleTy);
20728 SDValue InChain = DAG.getEntryNode();
20730 Subtarget);
20731 bool isSigned = N->getOpcode() == ISD::SREM;
20734
20735 if (Subtarget->isTargetWindows())
20736 InChain = WinDBZCheckDenominator(DAG, N, InChain);
20737
20738 // Lower call
20739 CallLoweringInfo CLI(DAG);
20740 CLI.setChain(InChain)
20741 .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
20743 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
20744
20745 // Return second (rem) result operand (first contains div)
20746 SDNode *ResNode = CallResult.first.getNode();
20747 assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
20748 return ResNode->getOperand(1);
20749}
20750
20751SDValue
20752ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
20753 assert(Subtarget->isTargetWindows() && "unsupported target platform");
20754 SDLoc DL(Op);
20755
20756 // Get the inputs.
20757 SDValue Chain = Op.getOperand(0);
20758 SDValue Size = Op.getOperand(1);
20759
20761 "no-stack-arg-probe")) {
20763 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
20764 SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20765 Chain = SP.getValue(1);
20766 SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);
20767 if (Align)
20768 SP =
20769 DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
20770 DAG.getConstant(-(uint64_t)Align->value(), DL, MVT::i32));
20771 Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);
20772 SDValue Ops[2] = { SP, Chain };
20773 return DAG.getMergeValues(Ops, DL);
20774 }
20775
20776 SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
20777 DAG.getConstant(2, DL, MVT::i32));
20778
20779 SDValue Glue;
20780 Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Glue);
20781 Glue = Chain.getValue(1);
20782
20783 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20784 Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Glue);
20785
20786 SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20787 Chain = NewSP.getValue(1);
20788
20789 SDValue Ops[2] = { NewSP, Chain };
20790 return DAG.getMergeValues(Ops, DL);
20791}
20792
20793SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
20794 bool IsStrict = Op->isStrictFPOpcode();
20795 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20796 const unsigned DstSz = Op.getValueType().getSizeInBits();
20797 const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
20798 assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
20799 "Unexpected type for custom-lowering FP_EXTEND");
20800
20801 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20802 "With both FP DP and 16, any FP conversion is legal!");
20803
20804 assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
20805 "With FP16, 16 to 32 conversion is legal!");
20806
20807 // Converting from 32 -> 64 is valid if we have FP64.
20808 if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
20809 // FIXME: Remove this when we have strict fp instruction selection patterns
20810 if (IsStrict) {
20811 SDLoc Loc(Op);
20813 Loc, Op.getValueType(), SrcVal);
20814 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
20815 }
20816 return Op;
20817 }
20818
20819 // Either we are converting from 16 -> 64, without FP16 and/or
20820 // FP.double-precision or without Armv8-fp. So we must do it in two
20821 // steps.
20822 // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
20823 // without FP16. So we must do a function call.
20824 SDLoc Loc(Op);
20825 RTLIB::Libcall LC;
20826 MakeLibCallOptions CallOptions;
20827 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20828 for (unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {
20829 bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
20830 MVT SrcVT = (Sz == 16 ? MVT::f16 : MVT::f32);
20831 MVT DstVT = (Sz == 16 ? MVT::f32 : MVT::f64);
20832 if (Supported) {
20833 if (IsStrict) {
20834 SrcVal = DAG.getNode(ISD::STRICT_FP_EXTEND, Loc,
20835 {DstVT, MVT::Other}, {Chain, SrcVal});
20836 Chain = SrcVal.getValue(1);
20837 } else {
20838 SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, DstVT, SrcVal);
20839 }
20840 } else {
20841 LC = RTLIB::getFPEXT(SrcVT, DstVT);
20842 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20843 "Unexpected type for custom-lowering FP_EXTEND");
20844 std::tie(SrcVal, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20845 Loc, Chain);
20846 }
20847 }
20848
20849 return IsStrict ? DAG.getMergeValues({SrcVal, Chain}, Loc) : SrcVal;
20850}
20851
20852SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
20853 bool IsStrict = Op->isStrictFPOpcode();
20854
20855 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20856 EVT SrcVT = SrcVal.getValueType();
20857 EVT DstVT = Op.getValueType();
20858 const unsigned DstSz = Op.getValueType().getSizeInBits();
20859 const unsigned SrcSz = SrcVT.getSizeInBits();
20860 (void)DstSz;
20861 assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&
20862 "Unexpected type for custom-lowering FP_ROUND");
20863
20864 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20865 "With both FP DP and 16, any FP conversion is legal!");
20866
20867 SDLoc Loc(Op);
20868
20869 // Instruction from 32 -> 16 if hasFP16 is valid
20870 if (SrcSz == 32 && Subtarget->hasFP16())
20871 return Op;
20872
20873 // Lib call from 32 -> 16 / 64 -> [32, 16]
20874 RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
20875 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20876 "Unexpected type for custom-lowering FP_ROUND");
20877 MakeLibCallOptions CallOptions;
20878 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20880 std::tie(Result, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20881 Loc, Chain);
20882 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
20883}
20884
20885bool
20887 // The ARM target isn't yet aware of offsets.
20888 return false;
20889}
20890
20892 if (v == 0xffffffff)
20893 return false;
20894
20895 // there can be 1's on either or both "outsides", all the "inside"
20896 // bits must be 0's
20897 return isShiftedMask_32(~v);
20898}
20899
20900/// isFPImmLegal - Returns true if the target can instruction select the
20901/// specified FP immediate natively. If false, the legalizer will
20902/// materialize the FP immediate as a load from a constant pool.
20904 bool ForCodeSize) const {
20905 if (!Subtarget->hasVFP3Base())
20906 return false;
20907 if (VT == MVT::f16 && Subtarget->hasFullFP16())
20908 return ARM_AM::getFP16Imm(Imm) != -1;
20909 if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
20910 ARM_AM::getFP32FP16Imm(Imm) != -1)
20911 return true;
20912 if (VT == MVT::f32)
20913 return ARM_AM::getFP32Imm(Imm) != -1;
20914 if (VT == MVT::f64 && Subtarget->hasFP64())
20915 return ARM_AM::getFP64Imm(Imm) != -1;
20916 return false;
20917}
20918
20919/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
20920/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
20921/// specified in the intrinsic calls.
20923 const CallInst &I,
20924 MachineFunction &MF,
20925 unsigned Intrinsic) const {
20926 switch (Intrinsic) {
20927 case Intrinsic::arm_neon_vld1:
20928 case Intrinsic::arm_neon_vld2:
20929 case Intrinsic::arm_neon_vld3:
20930 case Intrinsic::arm_neon_vld4:
20931 case Intrinsic::arm_neon_vld2lane:
20932 case Intrinsic::arm_neon_vld3lane:
20933 case Intrinsic::arm_neon_vld4lane:
20934 case Intrinsic::arm_neon_vld2dup:
20935 case Intrinsic::arm_neon_vld3dup:
20936 case Intrinsic::arm_neon_vld4dup: {
20938 // Conservatively set memVT to the entire set of vectors loaded.
20939 auto &DL = I.getDataLayout();
20940 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
20941 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20942 Info.ptrVal = I.getArgOperand(0);
20943 Info.offset = 0;
20944 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
20945 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
20946 // volatile loads with NEON intrinsics not supported
20948 return true;
20949 }
20950 case Intrinsic::arm_neon_vld1x2:
20951 case Intrinsic::arm_neon_vld1x3:
20952 case Intrinsic::arm_neon_vld1x4: {
20954 // Conservatively set memVT to the entire set of vectors loaded.
20955 auto &DL = I.getDataLayout();
20956 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
20957 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20958 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
20959 Info.offset = 0;
20960 Info.align = I.getParamAlign(I.arg_size() - 1).valueOrOne();
20961 // volatile loads with NEON intrinsics not supported
20963 return true;
20964 }
20965 case Intrinsic::arm_neon_vst1:
20966 case Intrinsic::arm_neon_vst2:
20967 case Intrinsic::arm_neon_vst3:
20968 case Intrinsic::arm_neon_vst4:
20969 case Intrinsic::arm_neon_vst2lane:
20970 case Intrinsic::arm_neon_vst3lane:
20971 case Intrinsic::arm_neon_vst4lane: {
20973 // Conservatively set memVT to the entire set of vectors stored.
20974 auto &DL = I.getDataLayout();
20975 unsigned NumElts = 0;
20976 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
20977 Type *ArgTy = I.getArgOperand(ArgI)->getType();
20978 if (!ArgTy->isVectorTy())
20979 break;
20980 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
20981 }
20982 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20983 Info.ptrVal = I.getArgOperand(0);
20984 Info.offset = 0;
20985 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
20986 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
20987 // volatile stores with NEON intrinsics not supported
20989 return true;
20990 }
20991 case Intrinsic::arm_neon_vst1x2:
20992 case Intrinsic::arm_neon_vst1x3:
20993 case Intrinsic::arm_neon_vst1x4: {
20995 // Conservatively set memVT to the entire set of vectors stored.
20996 auto &DL = I.getDataLayout();
20997 unsigned NumElts = 0;
20998 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
20999 Type *ArgTy = I.getArgOperand(ArgI)->getType();
21000 if (!ArgTy->isVectorTy())
21001 break;
21002 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
21003 }
21004 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21005 Info.ptrVal = I.getArgOperand(0);
21006 Info.offset = 0;
21007 Info.align = I.getParamAlign(0).valueOrOne();
21008 // volatile stores with NEON intrinsics not supported
21010 return true;
21011 }
21012 case Intrinsic::arm_mve_vld2q:
21013 case Intrinsic::arm_mve_vld4q: {
21015 // Conservatively set memVT to the entire set of vectors loaded.
21016 Type *VecTy = cast<StructType>(I.getType())->getElementType(1);
21017 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vld2q ? 2 : 4;
21018 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21019 Info.ptrVal = I.getArgOperand(0);
21020 Info.offset = 0;
21021 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21022 // volatile loads with MVE intrinsics not supported
21024 return true;
21025 }
21026 case Intrinsic::arm_mve_vst2q:
21027 case Intrinsic::arm_mve_vst4q: {
21029 // Conservatively set memVT to the entire set of vectors stored.
21030 Type *VecTy = I.getArgOperand(1)->getType();
21031 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vst2q ? 2 : 4;
21032 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21033 Info.ptrVal = I.getArgOperand(0);
21034 Info.offset = 0;
21035 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21036 // volatile stores with MVE intrinsics not supported
21038 return true;
21039 }
21040 case Intrinsic::arm_mve_vldr_gather_base:
21041 case Intrinsic::arm_mve_vldr_gather_base_predicated: {
21043 Info.ptrVal = nullptr;
21044 Info.memVT = MVT::getVT(I.getType());
21045 Info.align = Align(1);
21047 return true;
21048 }
21049 case Intrinsic::arm_mve_vldr_gather_base_wb:
21050 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
21052 Info.ptrVal = nullptr;
21053 Info.memVT = MVT::getVT(I.getType()->getContainedType(0));
21054 Info.align = Align(1);
21056 return true;
21057 }
21058 case Intrinsic::arm_mve_vldr_gather_offset:
21059 case Intrinsic::arm_mve_vldr_gather_offset_predicated: {
21061 Info.ptrVal = nullptr;
21062 MVT DataVT = MVT::getVT(I.getType());
21063 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
21064 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21065 DataVT.getVectorNumElements());
21066 Info.align = Align(1);
21068 return true;
21069 }
21070 case Intrinsic::arm_mve_vstr_scatter_base:
21071 case Intrinsic::arm_mve_vstr_scatter_base_predicated: {
21073 Info.ptrVal = nullptr;
21074 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21075 Info.align = Align(1);
21077 return true;
21078 }
21079 case Intrinsic::arm_mve_vstr_scatter_base_wb:
21080 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: {
21082 Info.ptrVal = nullptr;
21083 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21084 Info.align = Align(1);
21086 return true;
21087 }
21088 case Intrinsic::arm_mve_vstr_scatter_offset:
21089 case Intrinsic::arm_mve_vstr_scatter_offset_predicated: {
21091 Info.ptrVal = nullptr;
21092 MVT DataVT = MVT::getVT(I.getArgOperand(2)->getType());
21093 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
21094 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21095 DataVT.getVectorNumElements());
21096 Info.align = Align(1);
21098 return true;
21099 }
21100 case Intrinsic::arm_ldaex:
21101 case Intrinsic::arm_ldrex: {
21102 auto &DL = I.getDataLayout();
21103 Type *ValTy = I.getParamElementType(0);
21105 Info.memVT = MVT::getVT(ValTy);
21106 Info.ptrVal = I.getArgOperand(0);
21107 Info.offset = 0;
21108 Info.align = DL.getABITypeAlign(ValTy);
21110 return true;
21111 }
21112 case Intrinsic::arm_stlex:
21113 case Intrinsic::arm_strex: {
21114 auto &DL = I.getDataLayout();
21115 Type *ValTy = I.getParamElementType(1);
21117 Info.memVT = MVT::getVT(ValTy);
21118 Info.ptrVal = I.getArgOperand(1);
21119 Info.offset = 0;
21120 Info.align = DL.getABITypeAlign(ValTy);
21122 return true;
21123 }
21124 case Intrinsic::arm_stlexd:
21125 case Intrinsic::arm_strexd:
21127 Info.memVT = MVT::i64;
21128 Info.ptrVal = I.getArgOperand(2);
21129 Info.offset = 0;
21130 Info.align = Align(8);
21132 return true;
21133
21134 case Intrinsic::arm_ldaexd:
21135 case Intrinsic::arm_ldrexd:
21137 Info.memVT = MVT::i64;
21138 Info.ptrVal = I.getArgOperand(0);
21139 Info.offset = 0;
21140 Info.align = Align(8);
21142 return true;
21143
21144 default:
21145 break;
21146 }
21147
21148 return false;
21149}
21150
21151/// Returns true if it is beneficial to convert a load of a constant
21152/// to just the constant itself.
21154 Type *Ty) const {
21155 assert(Ty->isIntegerTy());
21156
21157 unsigned Bits = Ty->getPrimitiveSizeInBits();
21158 if (Bits == 0 || Bits > 32)
21159 return false;
21160 return true;
21161}
21162
21164 unsigned Index) const {
21166 return false;
21167
21168 return (Index == 0 || Index == ResVT.getVectorNumElements());
21169}
21170
21172 ARM_MB::MemBOpt Domain) const {
21173 // First, if the target has no DMB, see what fallback we can use.
21174 if (!Subtarget->hasDataBarrier()) {
21175 // Some ARMv6 cpus can support data barriers with an mcr instruction.
21176 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
21177 // here.
21178 if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
21179 Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
21180 Builder.getInt32(0), Builder.getInt32(7),
21181 Builder.getInt32(10), Builder.getInt32(5)};
21182 return Builder.CreateIntrinsic(Intrinsic::arm_mcr, {}, args);
21183 } else {
21184 // Instead of using barriers, atomic accesses on these subtargets use
21185 // libcalls.
21186 llvm_unreachable("makeDMB on a target so old that it has no barriers");
21187 }
21188 } else {
21189 // Only a full system barrier exists in the M-class architectures.
21190 Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
21191 Constant *CDomain = Builder.getInt32(Domain);
21192 return Builder.CreateIntrinsic(Intrinsic::arm_dmb, {}, CDomain);
21193 }
21194}
21195
21196// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
21198 Instruction *Inst,
21199 AtomicOrdering Ord) const {
21200 switch (Ord) {
21203 llvm_unreachable("Invalid fence: unordered/non-atomic");
21206 return nullptr; // Nothing to do
21208 if (!Inst->hasAtomicStore())
21209 return nullptr; // Nothing to do
21210 [[fallthrough]];
21213 if (Subtarget->preferISHSTBarriers())
21214 return makeDMB(Builder, ARM_MB::ISHST);
21215 // FIXME: add a comment with a link to documentation justifying this.
21216 else
21217 return makeDMB(Builder, ARM_MB::ISH);
21218 }
21219 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
21220}
21221
21223 Instruction *Inst,
21224 AtomicOrdering Ord) const {
21225 switch (Ord) {
21228 llvm_unreachable("Invalid fence: unordered/not-atomic");
21231 return nullptr; // Nothing to do
21235 return makeDMB(Builder, ARM_MB::ISH);
21236 }
21237 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
21238}
21239
21240// Loads and stores less than 64-bits are already atomic; ones above that
21241// are doomed anyway, so defer to the default libcall and blame the OS when
21242// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21243// anything for those.
21246 bool has64BitAtomicStore;
21247 if (Subtarget->isMClass())
21248 has64BitAtomicStore = false;
21249 else if (Subtarget->isThumb())
21250 has64BitAtomicStore = Subtarget->hasV7Ops();
21251 else
21252 has64BitAtomicStore = Subtarget->hasV6Ops();
21253
21254 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
21255 return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand
21257}
21258
21259// Loads and stores less than 64-bits are already atomic; ones above that
21260// are doomed anyway, so defer to the default libcall and blame the OS when
21261// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21262// anything for those.
21263// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
21264// guarantee, see DDI0406C ARM architecture reference manual,
21265// sections A8.8.72-74 LDRD)
21268 bool has64BitAtomicLoad;
21269 if (Subtarget->isMClass())
21270 has64BitAtomicLoad = false;
21271 else if (Subtarget->isThumb())
21272 has64BitAtomicLoad = Subtarget->hasV7Ops();
21273 else
21274 has64BitAtomicLoad = Subtarget->hasV6Ops();
21275
21276 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
21277 return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly
21279}
21280
21281// For the real atomic operations, we have ldrex/strex up to 32 bits,
21282// and up to 64 bits on the non-M profiles
21285 if (AI->isFloatingPointOperation())
21287
21288 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21289 bool hasAtomicRMW;
21290 if (Subtarget->isMClass())
21291 hasAtomicRMW = Subtarget->hasV8MBaselineOps();
21292 else if (Subtarget->isThumb())
21293 hasAtomicRMW = Subtarget->hasV7Ops();
21294 else
21295 hasAtomicRMW = Subtarget->hasV6Ops();
21296 if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
21297 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21298 // implement atomicrmw without spilling. If the target address is also on
21299 // the stack and close enough to the spill slot, this can lead to a
21300 // situation where the monitor always gets cleared and the atomic operation
21301 // can never succeed. So at -O0 lower this operation to a CAS loop.
21302 if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None)
21305 }
21307}
21308
21309// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
21310// bits, and up to 64 bits on the non-M profiles.
21313 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21314 // implement cmpxchg without spilling. If the address being exchanged is also
21315 // on the stack and close enough to the spill slot, this can lead to a
21316 // situation where the monitor always gets cleared and the atomic operation
21317 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
21318 unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
21319 bool HasAtomicCmpXchg;
21320 if (Subtarget->isMClass())
21321 HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
21322 else if (Subtarget->isThumb())
21323 HasAtomicCmpXchg = Subtarget->hasV7Ops();
21324 else
21325 HasAtomicCmpXchg = Subtarget->hasV6Ops();
21326 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None &&
21327 HasAtomicCmpXchg && Size <= (Subtarget->isMClass() ? 32U : 64U))
21330}
21331
21333 const Instruction *I) const {
21334 return InsertFencesForAtomic;
21335}
21336
21338 // ROPI/RWPI are not supported currently.
21339 return !Subtarget->isROPI() && !Subtarget->isRWPI();
21340}
21341
21343 if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21345
21346 // MSVC CRT has a global variable holding security cookie.
21347 M.getOrInsertGlobal("__security_cookie",
21348 PointerType::getUnqual(M.getContext()));
21349
21350 // MSVC CRT has a function to validate security cookie.
21351 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
21352 "__security_check_cookie", Type::getVoidTy(M.getContext()),
21353 PointerType::getUnqual(M.getContext()));
21354 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
21355 F->addParamAttr(0, Attribute::AttrKind::InReg);
21356}
21357
21359 // MSVC CRT has a global variable holding security cookie.
21360 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21361 return M.getGlobalVariable("__security_cookie");
21363}
21364
21366 // MSVC CRT has a function to validate security cookie.
21367 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21368 return M.getFunction("__security_check_cookie");
21370}
21371
21373 unsigned &Cost) const {
21374 // If we do not have NEON, vector types are not natively supported.
21375 if (!Subtarget->hasNEON())
21376 return false;
21377
21378 // Floating point values and vector values map to the same register file.
21379 // Therefore, although we could do a store extract of a vector type, this is
21380 // better to leave at float as we have more freedom in the addressing mode for
21381 // those.
21382 if (VectorTy->isFPOrFPVectorTy())
21383 return false;
21384
21385 // If the index is unknown at compile time, this is very expensive to lower
21386 // and it is not possible to combine the store with the extract.
21387 if (!isa<ConstantInt>(Idx))
21388 return false;
21389
21390 assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
21391 unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedValue();
21392 // We can do a store + vector extract on any vector that fits perfectly in a D
21393 // or Q register.
21394 if (BitWidth == 64 || BitWidth == 128) {
21395 Cost = 0;
21396 return true;
21397 }
21398 return false;
21399}
21400
21402 return Subtarget->hasV6T2Ops();
21403}
21404
21406 return Subtarget->hasV6T2Ops();
21407}
21408
21410 const Instruction &AndI) const {
21411 if (!Subtarget->hasV7Ops())
21412 return false;
21413
21414 // Sink the `and` instruction only if the mask would fit into a modified
21415 // immediate operand.
21416 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
21417 if (!Mask || Mask->getValue().getBitWidth() > 32u)
21418 return false;
21419 auto MaskVal = unsigned(Mask->getValue().getZExtValue());
21420 return (Subtarget->isThumb2() ? ARM_AM::getT2SOImmVal(MaskVal)
21421 : ARM_AM::getSOImmVal(MaskVal)) != -1;
21422}
21423
21426 SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const {
21427 if (Subtarget->hasMinSize() && !Subtarget->isTargetWindows())
21430 ExpansionFactor);
21431}
21432
21434 Value *Addr,
21435 AtomicOrdering Ord) const {
21436 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21437 bool IsAcquire = isAcquireOrStronger(Ord);
21438
21439 // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
21440 // intrinsic must return {i32, i32} and we have to recombine them into a
21441 // single i64 here.
21442 if (ValueTy->getPrimitiveSizeInBits() == 64) {
21444 IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
21445
21446 Value *LoHi =
21447 Builder.CreateIntrinsic(Int, {}, Addr, /*FMFSource=*/nullptr, "lohi");
21448
21449 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
21450 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
21451 if (!Subtarget->isLittle())
21452 std::swap (Lo, Hi);
21453 Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
21454 Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
21455 return Builder.CreateOr(
21456 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 32)), "val64");
21457 }
21458
21459 Type *Tys[] = { Addr->getType() };
21460 Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
21461 CallInst *CI = Builder.CreateIntrinsic(Int, Tys, Addr);
21462
21463 CI->addParamAttr(
21464 0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));
21465 return Builder.CreateTruncOrBitCast(CI, ValueTy);
21466}
21467
21469 IRBuilderBase &Builder) const {
21470 if (!Subtarget->hasV7Ops())
21471 return;
21472 Builder.CreateIntrinsic(Intrinsic::arm_clrex, {}, {});
21473}
21474
21476 Value *Val, Value *Addr,
21477 AtomicOrdering Ord) const {
21478 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21479 bool IsRelease = isReleaseOrStronger(Ord);
21480
21481 // Since the intrinsics must have legal type, the i64 intrinsics take two
21482 // parameters: "i32, i32". We must marshal Val into the appropriate form
21483 // before the call.
21484 if (Val->getType()->getPrimitiveSizeInBits() == 64) {
21486 IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
21487 Type *Int32Ty = Type::getInt32Ty(M->getContext());
21488
21489 Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
21490 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
21491 if (!Subtarget->isLittle())
21492 std::swap(Lo, Hi);
21493 return Builder.CreateIntrinsic(Int, {}, {Lo, Hi, Addr});
21494 }
21495
21496 Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
21497 Type *Tys[] = { Addr->getType() };
21499
21500 CallInst *CI = Builder.CreateCall(
21501 Strex, {Builder.CreateZExtOrBitCast(
21502 Val, Strex->getFunctionType()->getParamType(0)),
21503 Addr});
21504 CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType,
21505 Val->getType()));
21506 return CI;
21507}
21508
21509
21511 return Subtarget->isMClass();
21512}
21513
21514/// A helper function for determining the number of interleaved accesses we
21515/// will generate when lowering accesses of the given type.
21516unsigned
21518 const DataLayout &DL) const {
21519 return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
21520}
21521
21523 unsigned Factor, FixedVectorType *VecTy, Align Alignment,
21524 const DataLayout &DL) const {
21525
21526 unsigned VecSize = DL.getTypeSizeInBits(VecTy);
21527 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
21528
21529 if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())
21530 return false;
21531
21532 // Ensure the vector doesn't have f16 elements. Even though we could do an
21533 // i16 vldN, we can't hold the f16 vectors and will end up converting via
21534 // f32.
21535 if (Subtarget->hasNEON() && VecTy->getElementType()->isHalfTy())
21536 return false;
21537 if (Subtarget->hasMVEIntegerOps() && Factor == 3)
21538 return false;
21539
21540 // Ensure the number of vector elements is greater than 1.
21541 if (VecTy->getNumElements() < 2)
21542 return false;
21543
21544 // Ensure the element type is legal.
21545 if (ElSize != 8 && ElSize != 16 && ElSize != 32)
21546 return false;
21547 // And the alignment if high enough under MVE.
21548 if (Subtarget->hasMVEIntegerOps() && Alignment < ElSize / 8)
21549 return false;
21550
21551 // Ensure the total vector size is 64 or a multiple of 128. Types larger than
21552 // 128 will be split into multiple interleaved accesses.
21553 if (Subtarget->hasNEON() && VecSize == 64)
21554 return true;
21555 return VecSize % 128 == 0;
21556}
21557
21559 if (Subtarget->hasNEON())
21560 return 4;
21561 if (Subtarget->hasMVEIntegerOps())
21564}
21565
21566/// Lower an interleaved load into a vldN intrinsic.
21567///
21568/// E.g. Lower an interleaved load (Factor = 2):
21569/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
21570/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21571/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21572///
21573/// Into:
21574/// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
21575/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
21576/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
21579 ArrayRef<unsigned> Indices, unsigned Factor) const {
21580 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21581 "Invalid interleave factor");
21582 assert(!Shuffles.empty() && "Empty shufflevector input");
21583 assert(Shuffles.size() == Indices.size() &&
21584 "Unmatched number of shufflevectors and indices");
21585
21586 auto *VecTy = cast<FixedVectorType>(Shuffles[0]->getType());
21587 Type *EltTy = VecTy->getElementType();
21588
21589 const DataLayout &DL = LI->getDataLayout();
21590 Align Alignment = LI->getAlign();
21591
21592 // Skip if we do not have NEON and skip illegal vector types. We can
21593 // "legalize" wide vector types into multiple interleaved accesses as long as
21594 // the vector types are divisible by 128.
21595 if (!isLegalInterleavedAccessType(Factor, VecTy, Alignment, DL))
21596 return false;
21597
21598 unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
21599
21600 // A pointer vector can not be the return type of the ldN intrinsics. Need to
21601 // load integer vectors first and then convert to pointer vectors.
21602 if (EltTy->isPointerTy())
21603 VecTy = FixedVectorType::get(DL.getIntPtrType(EltTy), VecTy);
21604
21605 IRBuilder<> Builder(LI);
21606
21607 // The base address of the load.
21608 Value *BaseAddr = LI->getPointerOperand();
21609
21610 if (NumLoads > 1) {
21611 // If we're going to generate more than one load, reset the sub-vector type
21612 // to something legal.
21613 VecTy = FixedVectorType::get(VecTy->getElementType(),
21614 VecTy->getNumElements() / NumLoads);
21615 }
21616
21617 assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
21618
21619 auto createLoadIntrinsic = [&](Value *BaseAddr) {
21620 if (Subtarget->hasNEON()) {
21621 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21622 Type *Tys[] = {VecTy, PtrTy};
21623 static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
21624 Intrinsic::arm_neon_vld3,
21625 Intrinsic::arm_neon_vld4};
21626
21628 Ops.push_back(BaseAddr);
21629 Ops.push_back(Builder.getInt32(LI->getAlign().value()));
21630
21631 return Builder.CreateIntrinsic(LoadInts[Factor - 2], Tys, Ops,
21632 /*FMFSource=*/nullptr, "vldN");
21633 } else {
21634 assert((Factor == 2 || Factor == 4) &&
21635 "expected interleave factor of 2 or 4 for MVE");
21636 Intrinsic::ID LoadInts =
21637 Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
21638 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21639 Type *Tys[] = {VecTy, PtrTy};
21640
21642 Ops.push_back(BaseAddr);
21643 return Builder.CreateIntrinsic(LoadInts, Tys, Ops, /*FMFSource=*/nullptr,
21644 "vldN");
21645 }
21646 };
21647
21648 // Holds sub-vectors extracted from the load intrinsic return values. The
21649 // sub-vectors are associated with the shufflevector instructions they will
21650 // replace.
21652
21653 for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
21654 // If we're generating more than one load, compute the base address of
21655 // subsequent loads as an offset from the previous.
21656 if (LoadCount > 0)
21657 BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
21658 VecTy->getNumElements() * Factor);
21659
21660 CallInst *VldN = createLoadIntrinsic(BaseAddr);
21661
21662 // Replace uses of each shufflevector with the corresponding vector loaded
21663 // by ldN.
21664 for (unsigned i = 0; i < Shuffles.size(); i++) {
21665 ShuffleVectorInst *SV = Shuffles[i];
21666 unsigned Index = Indices[i];
21667
21668 Value *SubVec = Builder.CreateExtractValue(VldN, Index);
21669
21670 // Convert the integer vector to pointer vector if the element is pointer.
21671 if (EltTy->isPointerTy())
21672 SubVec = Builder.CreateIntToPtr(
21673 SubVec,
21675
21676 SubVecs[SV].push_back(SubVec);
21677 }
21678 }
21679
21680 // Replace uses of the shufflevector instructions with the sub-vectors
21681 // returned by the load intrinsic. If a shufflevector instruction is
21682 // associated with more than one sub-vector, those sub-vectors will be
21683 // concatenated into a single wide vector.
21684 for (ShuffleVectorInst *SVI : Shuffles) {
21685 auto &SubVec = SubVecs[SVI];
21686 auto *WideVec =
21687 SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
21688 SVI->replaceAllUsesWith(WideVec);
21689 }
21690
21691 return true;
21692}
21693
21694/// Lower an interleaved store into a vstN intrinsic.
21695///
21696/// E.g. Lower an interleaved store (Factor = 3):
21697/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21698/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21699/// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
21700///
21701/// Into:
21702/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21703/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21704/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21705/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21706///
21707/// Note that the new shufflevectors will be removed and we'll only generate one
21708/// vst3 instruction in CodeGen.
21709///
21710/// Example for a more general valid mask (Factor 3). Lower:
21711/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
21712/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
21713/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21714///
21715/// Into:
21716/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
21717/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
21718/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
21719/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21721 ShuffleVectorInst *SVI,
21722 unsigned Factor) const {
21723 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21724 "Invalid interleave factor");
21725
21726 auto *VecTy = cast<FixedVectorType>(SVI->getType());
21727 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
21728
21729 unsigned LaneLen = VecTy->getNumElements() / Factor;
21730 Type *EltTy = VecTy->getElementType();
21731 auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
21732
21733 const DataLayout &DL = SI->getDataLayout();
21734 Align Alignment = SI->getAlign();
21735
21736 // Skip if we do not have NEON and skip illegal vector types. We can
21737 // "legalize" wide vector types into multiple interleaved accesses as long as
21738 // the vector types are divisible by 128.
21739 if (!isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, DL))
21740 return false;
21741
21742 unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
21743
21744 Value *Op0 = SVI->getOperand(0);
21745 Value *Op1 = SVI->getOperand(1);
21746 IRBuilder<> Builder(SI);
21747
21748 // StN intrinsics don't support pointer vectors as arguments. Convert pointer
21749 // vectors to integer vectors.
21750 if (EltTy->isPointerTy()) {
21751 Type *IntTy = DL.getIntPtrType(EltTy);
21752
21753 // Convert to the corresponding integer vector.
21754 auto *IntVecTy =
21755 FixedVectorType::get(IntTy, cast<FixedVectorType>(Op0->getType()));
21756 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
21757 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
21758
21759 SubVecTy = FixedVectorType::get(IntTy, LaneLen);
21760 }
21761
21762 // The base address of the store.
21763 Value *BaseAddr = SI->getPointerOperand();
21764
21765 if (NumStores > 1) {
21766 // If we're going to generate more than one store, reset the lane length
21767 // and sub-vector type to something legal.
21768 LaneLen /= NumStores;
21769 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
21770 }
21771
21772 assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
21773
21774 auto Mask = SVI->getShuffleMask();
21775
21776 auto createStoreIntrinsic = [&](Value *BaseAddr,
21777 SmallVectorImpl<Value *> &Shuffles) {
21778 if (Subtarget->hasNEON()) {
21779 static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
21780 Intrinsic::arm_neon_vst3,
21781 Intrinsic::arm_neon_vst4};
21782 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21783 Type *Tys[] = {PtrTy, SubVecTy};
21784
21786 Ops.push_back(BaseAddr);
21787 append_range(Ops, Shuffles);
21788 Ops.push_back(Builder.getInt32(SI->getAlign().value()));
21789 Builder.CreateIntrinsic(StoreInts[Factor - 2], Tys, Ops);
21790 } else {
21791 assert((Factor == 2 || Factor == 4) &&
21792 "expected interleave factor of 2 or 4 for MVE");
21793 Intrinsic::ID StoreInts =
21794 Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
21795 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21796 Type *Tys[] = {PtrTy, SubVecTy};
21797
21799 Ops.push_back(BaseAddr);
21800 append_range(Ops, Shuffles);
21801 for (unsigned F = 0; F < Factor; F++) {
21802 Ops.push_back(Builder.getInt32(F));
21803 Builder.CreateIntrinsic(StoreInts, Tys, Ops);
21804 Ops.pop_back();
21805 }
21806 }
21807 };
21808
21809 for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
21810 // If we generating more than one store, we compute the base address of
21811 // subsequent stores as an offset from the previous.
21812 if (StoreCount > 0)
21813 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
21814 BaseAddr, LaneLen * Factor);
21815
21816 SmallVector<Value *, 4> Shuffles;
21817
21818 // Split the shufflevector operands into sub vectors for the new vstN call.
21819 for (unsigned i = 0; i < Factor; i++) {
21820 unsigned IdxI = StoreCount * LaneLen * Factor + i;
21821 if (Mask[IdxI] >= 0) {
21822 Shuffles.push_back(Builder.CreateShuffleVector(
21823 Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
21824 } else {
21825 unsigned StartMask = 0;
21826 for (unsigned j = 1; j < LaneLen; j++) {
21827 unsigned IdxJ = StoreCount * LaneLen * Factor + j;
21828 if (Mask[IdxJ * Factor + IdxI] >= 0) {
21829 StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
21830 break;
21831 }
21832 }
21833 // Note: If all elements in a chunk are undefs, StartMask=0!
21834 // Note: Filling undef gaps with random elements is ok, since
21835 // those elements were being written anyway (with undefs).
21836 // In the case of all undefs we're defaulting to using elems from 0
21837 // Note: StartMask cannot be negative, it's checked in
21838 // isReInterleaveMask
21839 Shuffles.push_back(Builder.CreateShuffleVector(
21840 Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
21841 }
21842 }
21843
21844 createStoreIntrinsic(BaseAddr, Shuffles);
21845 }
21846 return true;
21847}
21848
21856
21858 uint64_t &Members) {
21859 if (auto *ST = dyn_cast<StructType>(Ty)) {
21860 for (unsigned i = 0; i < ST->getNumElements(); ++i) {
21861 uint64_t SubMembers = 0;
21862 if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
21863 return false;
21864 Members += SubMembers;
21865 }
21866 } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
21867 uint64_t SubMembers = 0;
21868 if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
21869 return false;
21870 Members += SubMembers * AT->getNumElements();
21871 } else if (Ty->isFloatTy()) {
21872 if (Base != HA_UNKNOWN && Base != HA_FLOAT)
21873 return false;
21874 Members = 1;
21875 Base = HA_FLOAT;
21876 } else if (Ty->isDoubleTy()) {
21877 if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
21878 return false;
21879 Members = 1;
21880 Base = HA_DOUBLE;
21881 } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
21882 Members = 1;
21883 switch (Base) {
21884 case HA_FLOAT:
21885 case HA_DOUBLE:
21886 return false;
21887 case HA_VECT64:
21888 return VT->getPrimitiveSizeInBits().getFixedValue() == 64;
21889 case HA_VECT128:
21890 return VT->getPrimitiveSizeInBits().getFixedValue() == 128;
21891 case HA_UNKNOWN:
21892 switch (VT->getPrimitiveSizeInBits().getFixedValue()) {
21893 case 64:
21894 Base = HA_VECT64;
21895 return true;
21896 case 128:
21897 Base = HA_VECT128;
21898 return true;
21899 default:
21900 return false;
21901 }
21902 }
21903 }
21904
21905 return (Members > 0 && Members <= 4);
21906}
21907
21908/// Return the correct alignment for the current calling convention.
21910 Type *ArgTy, const DataLayout &DL) const {
21911 const Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
21912 if (!ArgTy->isVectorTy())
21913 return ABITypeAlign;
21914
21915 // Avoid over-aligning vector parameters. It would require realigning the
21916 // stack and waste space for no real benefit.
21917 MaybeAlign StackAlign = DL.getStackAlignment();
21918 assert(StackAlign && "data layout string is missing stack alignment");
21919 return std::min(ABITypeAlign, *StackAlign);
21920}
21921
21922/// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
21923/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
21924/// passing according to AAPCS rules.
21926 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
21927 const DataLayout &DL) const {
21928 if (getEffectiveCallingConv(CallConv, isVarArg) !=
21930 return false;
21931
21933 uint64_t Members = 0;
21934 bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
21935 LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
21936
21937 bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
21938 return IsHA || IsIntArray;
21939}
21940
21942 const Constant *PersonalityFn) const {
21943 // Platforms which do not use SjLj EH may return values in these registers
21944 // via the personality function.
21945 return Subtarget->useSjLjEH() ? Register() : ARM::R0;
21946}
21947
21949 const Constant *PersonalityFn) const {
21950 // Platforms which do not use SjLj EH may return values in these registers
21951 // via the personality function.
21952 return Subtarget->useSjLjEH() ? Register() : ARM::R1;
21953}
21954
21955void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
21956 // Update IsSplitCSR in ARMFunctionInfo.
21957 ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
21958 AFI->setIsSplitCSR(true);
21959}
21960
21961void ARMTargetLowering::insertCopiesSplitCSR(
21962 MachineBasicBlock *Entry,
21963 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
21964 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
21965 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
21966 if (!IStart)
21967 return;
21968
21969 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
21970 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
21971 MachineBasicBlock::iterator MBBI = Entry->begin();
21972 for (const MCPhysReg *I = IStart; *I; ++I) {
21973 const TargetRegisterClass *RC = nullptr;
21974 if (ARM::GPRRegClass.contains(*I))
21975 RC = &ARM::GPRRegClass;
21976 else if (ARM::DPRRegClass.contains(*I))
21977 RC = &ARM::DPRRegClass;
21978 else
21979 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
21980
21981 Register NewVR = MRI->createVirtualRegister(RC);
21982 // Create copy from CSR to a virtual register.
21983 // FIXME: this currently does not emit CFI pseudo-instructions, it works
21984 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
21985 // nounwind. If we want to generalize this later, we may need to emit
21986 // CFI pseudo-instructions.
21987 assert(Entry->getParent()->getFunction().hasFnAttribute(
21988 Attribute::NoUnwind) &&
21989 "Function should be nounwind in insertCopiesSplitCSR!");
21990 Entry->addLiveIn(*I);
21991 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
21992 .addReg(*I);
21993
21994 // Insert the copy-back instructions right before the terminator.
21995 for (auto *Exit : Exits)
21996 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
21997 TII->get(TargetOpcode::COPY), *I)
21998 .addReg(NewVR);
21999 }
22000}
22001
22005}
22006
22008 return Subtarget->hasMVEIntegerOps();
22009}
22010
22013 auto *VTy = dyn_cast<FixedVectorType>(Ty);
22014 if (!VTy)
22015 return false;
22016
22017 auto *ScalarTy = VTy->getScalarType();
22018 unsigned NumElements = VTy->getNumElements();
22019
22020 unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
22021 if (VTyWidth < 128 || !llvm::isPowerOf2_32(VTyWidth))
22022 return false;
22023
22024 // Both VCADD and VCMUL/VCMLA support the same types, F16 and F32
22025 if (ScalarTy->isHalfTy() || ScalarTy->isFloatTy())
22026 return Subtarget->hasMVEFloatOps();
22027
22029 return false;
22030
22031 return Subtarget->hasMVEIntegerOps() &&
22032 (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
22033 ScalarTy->isIntegerTy(32));
22034}
22035
22038 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
22039 Value *Accumulator) const {
22040
22041 FixedVectorType *Ty = cast<FixedVectorType>(InputA->getType());
22042
22043 unsigned TyWidth = Ty->getScalarSizeInBits() * Ty->getNumElements();
22044
22045 assert(TyWidth >= 128 && "Width of vector type must be at least 128 bits");
22046
22047 if (TyWidth > 128) {
22048 int Stride = Ty->getNumElements() / 2;
22049 auto SplitSeq = llvm::seq<int>(0, Ty->getNumElements());
22050 auto SplitSeqVec = llvm::to_vector(SplitSeq);
22051 ArrayRef<int> LowerSplitMask(&SplitSeqVec[0], Stride);
22052 ArrayRef<int> UpperSplitMask(&SplitSeqVec[Stride], Stride);
22053
22054 auto *LowerSplitA = B.CreateShuffleVector(InputA, LowerSplitMask);
22055 auto *LowerSplitB = B.CreateShuffleVector(InputB, LowerSplitMask);
22056 auto *UpperSplitA = B.CreateShuffleVector(InputA, UpperSplitMask);
22057 auto *UpperSplitB = B.CreateShuffleVector(InputB, UpperSplitMask);
22058 Value *LowerSplitAcc = nullptr;
22059 Value *UpperSplitAcc = nullptr;
22060
22061 if (Accumulator) {
22062 LowerSplitAcc = B.CreateShuffleVector(Accumulator, LowerSplitMask);
22063 UpperSplitAcc = B.CreateShuffleVector(Accumulator, UpperSplitMask);
22064 }
22065
22066 auto *LowerSplitInt = createComplexDeinterleavingIR(
22067 B, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
22068 auto *UpperSplitInt = createComplexDeinterleavingIR(
22069 B, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
22070
22071 ArrayRef<int> JoinMask(&SplitSeqVec[0], Ty->getNumElements());
22072 return B.CreateShuffleVector(LowerSplitInt, UpperSplitInt, JoinMask);
22073 }
22074
22075 auto *IntTy = Type::getInt32Ty(B.getContext());
22076
22077 ConstantInt *ConstRotation = nullptr;
22078 if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {
22079 ConstRotation = ConstantInt::get(IntTy, (int)Rotation);
22080
22081 if (Accumulator)
22082 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmlaq, Ty,
22083 {ConstRotation, Accumulator, InputB, InputA});
22084 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmulq, Ty,
22085 {ConstRotation, InputB, InputA});
22086 }
22087
22088 if (OperationType == ComplexDeinterleavingOperation::CAdd) {
22089 // 1 means the value is not halved.
22090 auto *ConstHalving = ConstantInt::get(IntTy, 1);
22091
22093 ConstRotation = ConstantInt::get(IntTy, 0);
22095 ConstRotation = ConstantInt::get(IntTy, 1);
22096
22097 if (!ConstRotation)
22098 return nullptr; // Invalid rotation for arm_mve_vcaddq
22099
22100 return B.CreateIntrinsic(Intrinsic::arm_mve_vcaddq, Ty,
22101 {ConstHalving, ConstRotation, InputA, InputB});
22102 }
22103
22104 return nullptr;
22105}
unsigned const MachineRegisterInfo * MRI
static bool isAddSubSExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isExtendedBUILD_VECTOR(SDValue N, SelectionDAG &DAG, bool isSigned)
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
static const MCPhysReg GPRArgRegs[]
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
#define MAKE_CASE(V)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
static bool isAddSubZExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
unsigned RegSize
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static bool isConstant(const MachineInstr &MI)
static const LLT S1
static const LLT F64
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG)
static bool isStore(int Opcode)
static bool isThumb(const MCSubtargetInfo &STI)
static SDValue LowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG)
static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
@ HA_DOUBLE
@ HA_VECT128
@ HA_VECT64
@ HA_FLOAT
@ HA_UNKNOWN
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total value size to 64 bits.
static cl::opt< unsigned > ConstpoolPromotionMaxSize("arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), cl::init(64))
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isVTBLMask(ArrayRef< int > M, EVT VT)
static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
static cl::opt< bool > EnableConstpoolPromotion("arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), cl::init(false))
static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformExtractFpToIntStores(StoreSDNode *St, SelectionDAG &DAG)
static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
static SDValue PerformExtractEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static const APInt * isPowerOf2Constant(SDValue V)
static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) can replace combinations of ...
static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
static SDValue LowerVECTOR_SHUFFLEUsingOneOff(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static bool isValidMVECond(unsigned CC, bool IsFloat)
static SDValue PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC)
IntCCToARMCC - Convert a DAG integer condition code to an ARM CC.
static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSTORECombine - Target-specific dag combine xforms for ISD::STORE.
static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, SelectionDAG &DAG)
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isGTorGE(ISD::CondCode CC)
static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) intrinsic,...
static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask)
static bool isReverseMask(ArrayRef< int > M, EVT VT)
static bool isVZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of "vector_shuffle v,...
static SDValue PerformSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVECTOR_REG_CASTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVMulVCTPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVMulVCTPCombine - VCVT (fixed-point to floating-point, Advanced SIMD) can replace combinations...
static SDValue createGPRPairNode2xi32(SelectionDAG &DAG, SDValue V0, SDValue V1)
static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG)
static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc)
static bool isVTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool CanInvertMVEVCMP(SDValue N)
static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG)
static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
PerformShiftCombine - Checks for immediate versions of vector shifts and lowers them.
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2)
FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static EVT getVectorTyFromPredicateVector(EVT VT)
static SDValue PerformFADDVCMLACombine(SDNode *N, SelectionDAG &DAG)
static SDValue handleCMSEValue(const SDValue &Value, const ISD::InputArg &Arg, SelectionDAG &DAG, const SDLoc &DL)
static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
static bool isSRL16(const SDValue &Op)
static SDValue PerformVMOVrhCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC)
static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr, SDValue Inc, const SelectionDAG &DAG)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static Register genTPEntry(MachineBasicBlock *TpEntry, MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpExit, Register OpSizeReg, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI)
Adds logic in loop entry MBB to calculate loop iteration count and adds t2WhileLoopSetup and t2WhileL...
static SDValue createGPRPairNodei64(SelectionDAG &DAG, SDValue V)
static bool isLTorLE(ISD::CondCode CC)
static SDValue PerformVCMPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformMVEVMULLCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, SelectionDAG &DAG)
static EVT getExtensionTo64Bits(const EVT &OrigVT)
static SDValue PerformBITCASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG)
static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock *BB, const TargetRegisterInfo *TRI)
static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG)
static bool hasNormalLoadOperand(SDNode *N)
hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node are normal,...
static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformInsertEltCombine - Target-specific dag combine xforms for ISD::INSERT_VECTOR_ELT.
static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVDUPLANECombine - Target-specific dag combine xforms for ARMISD::VDUPLANE.
static SDValue LowerBuildVectorOfFPTrunc(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static cl::opt< unsigned > ConstpoolPromotionMaxTotal("arm-promote-constant-max-total", cl::Hidden, cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128))
static SDValue LowerTruncatei1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static RTLIB::Libcall getDivRemLibcall(const SDNode *N, MVT::SimpleValueType SVT)
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG &DAG)
SkipLoadExtensionForVMULL - return a load of the original vector size that does not do any sign/zero ...
static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static const MCPhysReg GPRArgRegs[]
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.
static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT, SelectionDAG &DAG)
static bool isVZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformORCombineToSMULWBT(SDNode *OR, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isVTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of "vector_shuffle v,...
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue FindBFIToCombineWith(SDNode *N)
static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, SelectionDAG &DAG)
ShuffleOpCodes
@ OP_VEXT3
@ OP_VTRNR
@ OP_VDUP1
@ OP_VZIPR
@ OP_VUZPR
@ OP_VREV
@ OP_VZIPL
@ OP_VTRNL
@ OP_COPY
@ OP_VEXT1
@ OP_VDUP0
@ OP_VEXT2
@ OP_VUZPL
@ OP_VDUP3
@ OP_VDUP2
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps)
static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isS16(const SDValue &Op, SelectionDAG &DAG)
static bool isSRA16(const SDValue &Op)
constexpr MVT FlagsVT
Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
static SDValue AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue LowerInterruptReturn(SmallVectorImpl< SDValue > &RetOps, const SDLoc &DL, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, SelectionDAG &DAG)
static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2)
static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isSHL16(const SDValue &Op)
static bool isVEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseVEXT, unsigned &Imm)
static SDValue PerformMVEVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isTruncMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2)
Return the load opcode for a given load size.
static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
static bool isLegalMVEShuffleOp(unsigned PFEntry)
static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformShuffleVMOVNCombine(ShuffleVectorSDNode *N, SelectionDAG &DAG)
static bool isVUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG)
PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for ISD::VECTOR_SHUFFLE.
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG)
SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, ANY_EXTEND,...
static bool isVMOVNTruncMask(ArrayRef< int > M, EVT ToVT, bool rev)
static SDValue PerformVQMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
static SDValue LowerVecReduceMinMax(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformAddcSubcCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static TargetLowering::ArgListTy getDivRemArgList(const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget)
static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getZeroVector - Returns a vector of specified type with all zero elements.
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static ARMCC::CondCodes getVCMPCondCode(SDValue N)
static cl::opt< bool > ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true))
static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformORCombineToBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVSetCCToVCTPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerBUILD_VECTORToVIDUP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isZeroVector(SDValue N)
static SDValue PerformAddeSubeCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void ReplaceCMP_SWAP_64Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG)
static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment store operation with given size.
static bool isVMOVNMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, NEON load/store intrinsics,...
static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformCSETCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue PerformInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorExtend(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain)
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMULCombine Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the special multi...
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformORCombine - Target-specific dag combine xforms for ISD::OR.
static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG)
static SDValue PerformTruncatingStoreCombine(StoreSDNode *St, SelectionDAG &DAG)
static unsigned SelectPairHalf(unsigned Elements, ArrayRef< int > Mask, unsigned Index)
static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment load operation with given size.
static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG)
static bool isValidBaseUpdate(SDNode *N, SDNode *User)
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, const SDLoc &dl)
static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op)
static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment, bool isSEXTLoad, bool IsMasked, bool isLE, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
std::pair< unsigned, const TargetRegisterClass * > RCPair
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, ISD::ZERO_EXTEND,...
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, EVT VectorVT, VMOVModImmType type)
isVMOVModifiedImm - Check if the specified splat value corresponds to a valid vector constant for a N...
static SDValue LowerBuildVectorOfFPExt(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, SelectionDAG &DAG)
BC is a bitcast that is about to be turned into a VMOVDRR.
static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, const SDLoc &dl)
static unsigned isNEONTwoResultShuffleMask(ArrayRef< int > ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF)
Check if ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), and return the corresponding AR...
static bool BitsProperlyConcatenate(const APInt &A, const APInt &B)
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static SDValue LowerVecReduce(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, struct BaseUpdateUser &User, bool SimpleConstIncOnly, TargetLowering::DAGCombinerInfo &DCI)
static bool allUsersAreInFunction(const Value *V, const Function *F)
Return true if all users of V are within function F, looking through ConstantExprs.
static bool isSingletonVEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG)
PerformVMOVDRRCombine - Target-specific dag combine xforms for ARMISD::VMOVDRR.
static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, SDValue &SatK)
static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
isLegalAddressImmediate - Return true if the integer value can be used as the offset of the target ad...
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isLegalT1AddressImmediate(int64_t V, EVT VT)
static SDValue CombineANDShift(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformADDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDECombine - Target-specific dag combine transform from ARMISD::ADDC, ARMISD::ADDE,...
static SDValue PerformReduceShuffleCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerTruncate(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformHWLoopCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformSplittingMVETruncToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool isVUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of "vector_shuffle v,...
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members)
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG)
static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm, bool &Negate)
static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget)
canChangeToInt - Given the fp compare operand, return true if it is suitable to morph to an integer c...
static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2)
Return the store opcode for a given store size.
static bool IsVUZPShuffleNode(SDNode *N)
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, MachineInstr &MI, const SDNode *Node)
Attaches vregs to MEMCPY that it will use as scratch registers when it is expanded into LDM/STM.
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
static SDValue findMUL_LOHI(SDValue V)
static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformORCombine_i1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformSplittingMVEEXTToWideningLoad(SDNode *N, SelectionDAG &DAG)
static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG)
static void genTPLoopBody(MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI, Register OpSrcReg, Register OpDestReg, Register ElementCountReg, Register TotalIterationsReg, bool IsMemcpy)
Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and t2DoLoopEnd.
static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformBUILD_VECTORCombine - Target-specific dag combine xforms for ISD::BUILD_VECTOR.
static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformMinMaxCombine - Target-specific DAG combining for creating truncating saturates.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file a TargetTransformInfo::Concept conforming object specific to the ARM target machine.
Function Alias Analysis Results
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
uint64_t Addr
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:235
static bool isSigned(unsigned int Opcode)
#define Check(C,...)
#define op(i)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Value * LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP)
Emit the code to lower ctpop of V before the specified instruction IP.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
loop Loop Strength Reduction
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
nvptx lower args
uint64_t High
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
PowerPC Reduce CR logical Operation
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
This file describes how to lower LLVM code to machine code.
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static constexpr int Concat[]
Value * RHS
Value * LHS
static bool isIntrinsic(const CallBase &Call, Intrinsic::ID ID)
bool getExactInverse(APFloat *inv) const
Definition: APFloat.h:1479
APInt bitcastToAPInt() const
Definition: APFloat.h:1346
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1321
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1649
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1007
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1111
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1618
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1577
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:624
unsigned logBase2() const
Definition: APInt.h:1739
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:475
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1635
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
virtual const ARMBaseRegisterInfo & getRegisterInfo() const =0
const uint32_t * getSjLjDispatchPreservedMask(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
Register getFrameRegister(const MachineFunction &MF) const override
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
const uint32_t * getTLSCallPreservedMask(const MachineFunction &MF) const
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that 'returned' is on...
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
SmallPtrSet< const GlobalVariable *, 2 > & getGlobalsPromotedToConstantPool()
void setArgumentStackToRestore(unsigned v)
void setPromotedConstpoolIncrease(int Sz)
void setArgRegsSaveSize(unsigned s)
void setReturnRegsCount(unsigned s)
void setVarArgsFrameIndex(int Index)
unsigned getArgRegsSaveSize() const
void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV)
Indicate to the backend that GV has had its storage changed to inside a constant pool.
void setArgumentStackSize(unsigned size)
unsigned getArgumentStackSize() const
bool isTargetMachO() const
Definition: ARMSubtarget.h:349
bool useMovt() const
bool isTargetAEABI() const
Definition: ARMSubtarget.h:358
bool hasARMOps() const
Definition: ARMSubtarget.h:302
bool supportsTailCall() const
Definition: ARMSubtarget.h:427
const Triple & getTargetTriple() const
Definition: ARMSubtarget.h:335
bool hasVFP4Base() const
Definition: ARMSubtarget.h:310
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:238
bool isThumb1Only() const
Definition: ARMSubtarget.h:403
bool useFPVFMx() const
Definition: ARMSubtarget.h:319
bool hasFPARMv8Base() const
Definition: ARMSubtarget.h:311
bool isThumb2() const
Definition: ARMSubtarget.h:404
bool isTargetWindows() const
Definition: ARMSubtarget.h:345
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
bool hasBaseDSP() const
Definition: ARMSubtarget.h:325
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:242
bool useSjLjEH() const
Definition: ARMSubtarget.h:324
bool isTargetDarwin() const
Definition: ARMSubtarget.h:337
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:250
bool hasVFP2Base() const
Definition: ARMSubtarget.h:308
bool isTargetAndroid() const
Definition: ARMSubtarget.h:389
bool isROPI() const
bool isTargetCOFF() const
Definition: ARMSubtarget.h:347
bool isTargetGNUAEABI() const
Definition: ARMSubtarget.h:363
bool hasVFP3Base() const
Definition: ARMSubtarget.h:309
bool isAPCS_ABI() const
bool useFPVFMx64() const
Definition: ARMSubtarget.h:323
bool isTargetWatchOS() const
Definition: ARMSubtarget.h:339
unsigned getPreferBranchLogAlignment() const
Definition: ARMSubtarget.h:514
bool hasMinSize() const
Definition: ARMSubtarget.h:402
bool isTargetIOS() const
Definition: ARMSubtarget.h:338
bool useNEONForSinglePrecisionFP() const
Definition: ARMSubtarget.h:304
const InstrItineraryData * getInstrItineraryData() const override
getInstrItins - Return the instruction itineraries based on subtarget selection.
Definition: ARMSubtarget.h:461
bool isTargetWatchABI() const
Definition: ARMSubtarget.h:340
bool hasAnyDataBarrier() const
Definition: ARMSubtarget.h:313
bool isTargetDriverKit() const
Definition: ARMSubtarget.h:341
bool isAAPCS_ABI() const
bool isRWPI() const
bool isLittle() const
Definition: ARMSubtarget.h:435
bool allowsUnalignedMem() const
Definition: ARMSubtarget.h:429
bool isTargetMuslAEABI() const
Definition: ARMSubtarget.h:370
bool isTargetLinux() const
Definition: ARMSubtarget.h:342
bool useFPVFMx16() const
Definition: ARMSubtarget.h:322
bool isMClass() const
Definition: ARMSubtarget.h:405
bool isTargetHardFloat() const
bool useMulOps() const
Definition: ARMSubtarget.h:317
bool isTargetELF() const
Definition: ARMSubtarget.h:348
Align getDualLoadStoreAlignment() const
Definition: ARMSubtarget.h:471
bool isReadOnly(const GlobalValue *GV) const
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Align getABIAlignmentForCallingConv(Type *ArgTy, const DataLayout &DL) const override
Return the correct alignment for the current calling convention.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
const ARMSubtarget * getSubtarget() const
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const
bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const
Returns true if the addressing mode representing by AM is legal for the Thumb1 target,...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
SDValue PerformMVETruncCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize=false) const override
isFPImmLegal - Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
SDValue PerformIntrinsicCombine(SDNode *N, DAGCombinerInfo &DCI) const
PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
bool ExpandInlineAsm(CallInst *CI) const override
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const
PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
Value * createComplexDeinterleavingIR(IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator=nullptr) const override
Create the IR node for the given complex deinterleaving operation.
bool isComplexDeinterleavingSupported() const override
Does this target support complex deinterleaving.
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
SDValue PerformMVEExtCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to fold a pair of shifts into a mask.
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &OriginalDemandedBits, const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the value type to use for ISD::SETCC.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
createFastISel - This method returns a target specific FastISel object, or null if the target does no...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
isShuffleMaskLegal - Targets can use this to indicate that they only support some VECTOR_SHUFFLE oper...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const override
getRegClassFor - Return the register class that should be used for the specified value type.
bool useLoadStackGuardNode(const Module &M) const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vstN intrinsic.
ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI)
bool isComplexDeinterleavingOperationSupported(ComplexDeinterleavingOperation Operation, Type *Ty) const override
Does this target support complex deinterleaving with the given operation and type.
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const
PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const override
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
Instruction * makeDMB(IRBuilderBase &Builder, ARM_MB::MemBOpt Domain) const
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
const char * LowerXConstraint(EVT ConstraintVT) const override
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
allowsMisalignedMemoryAccesses - Returns true if the target allows unaligned memory accesses of the s...
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
bool isVectorLoadExtDesirable(SDValue ExtVal) const override
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override
Return true if the target can combine store(extractelement VectorTy, Idx).
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vldN intrinsic.
bool useSoftFloat() const override
bool alignLoopsWithOptSize() const override
Should loops be aligned even when the function is marked OptSize (but not MinSize).
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
Returns true if an argument of type Ty needs to be passed in a contiguous block of registers in calli...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPostIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mo...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
bool isFloatingPointOperation() const
Definition: Instructions.h:882
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:95
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
The address of a basic block.
Definition: Constants.h:893
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2,...
CCState - This class holds information needed while lowering arguments and return values.
void getInRegsParamInfo(unsigned InRegsParamRecordIndex, unsigned &BeginReg, unsigned &EndReg) const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
void rewindByValRegsInfo()
unsigned getInRegsParamsProcessed() const
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd)
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
unsigned getInRegsParamsCount() const
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1342
AttributeList getAttributes() const
Return the attributes for this call.
Definition: InstrTypes.h:1425
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1502
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:709
const APFloat & getValueAPF() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:271
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:197
bool isBigEndian() const
Definition: DataLayout.h:198
MaybeAlign getStackAlignment() const
Returns the natural stack alignment, or MaybeAlign() if one wasn't specified.
Definition: DataLayout.h:227
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:457
Align getPreferredAlign(const GlobalVariable *GV) const
Returns the preferred alignment of the specified global.
Definition: DataLayout.cpp:988
StringRef getPrivateGlobalPrefix() const
Definition: DataLayout.h:285
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
bool empty() const
Definition: DenseMap.h:98
iterator begin()
Definition: DenseMap.h:75
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:563
unsigned getNumElements() const
Definition: DerivedTypes.h:606
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:170
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
arg_iterator arg_begin()
Definition: Function.h:868
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
bool hasStructRetAttr() const
Determine if the function returns a structure through first or second pointer argument.
Definition: Function.h:688
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:234
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
const GlobalValue * getGlobal() const
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:278
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:631
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2165
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1902
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2547
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2150
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1460
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1439
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2048
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2525
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2145
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2444
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2034
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1520
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:566
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2181
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
bool isEmpty() const
Returns true if there are no itineraries.
bool hasAtomicStore() const LLVM_READONLY
Return true if this atomic instruction stores to memory.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
Class to represent integer types.
Definition: DerivedTypes.h:42
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:74
static bool LowerToByteSwap(CallInst *CI)
Try to replace a call instruction with a call to a bswap intrinsic.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:176
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:261
Value * getPointerOperand()
Definition: Instructions.h:255
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:600
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:237
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
void moveAfter(MachineBasicBlock *NewBefore)
void setIsEHPad(bool V=true)
Indicates the block is a landing pad.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void computeMaxCallFrameSize(MachineFunction &MF, std::vector< MachineBasicBlock::iterator > *FrameSDOps=nullptr)
Computes the maximum size of a callframe.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getFunctionContextIndex() const
Return the index for the function context object.
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & reset(Property P)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
unsigned createJumpTableIndex(const std::vector< MachineBasicBlock * > &DestBBs)
createJumpTableIndex - Create a new jump table.
@ EK_Inline
EK_Inline - Jump table entries are emitted inline at their point of use.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This class is used to represent an MLOAD node.
This class is used to represent an MSTORE node.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
bool isVolatile() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:130
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:686
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< use_iterator > uses()
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Return true if the type of the node type undefined.
iterator_range< user_iterator > users()
void setFlags(SDNodeFlags NewFlags)
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:748
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:497
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:799
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:501
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:854
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:825
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:495
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:710
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:496
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:698
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:490
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:508
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:765
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:578
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
void reserve(size_type NumEntries)
Definition: SmallPtrSet.h:112
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
bool empty() const
Definition: SmallSet.h:168
bool erase(const T &V)
Definition: SmallSet.h:193
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:805
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
const unsigned char * bytes_end() const
Definition: StringRef.h:131
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
const unsigned char * bytes_begin() const
Definition: StringRef.h:128
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC)
Override the default CondCode to be used to test the result of the comparison libcall against zero.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
ShiftLegalizationStrategy
Return the preferred strategy to legalize tihs SHIFT instruction, with ExpansionFactor being the recu...
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
const TargetMachine & getTargetMachine() const
void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC)
Set the CallingConv that should be used for the specified libcall.
void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked load does or does not work with the specified type and ind...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
virtual unsigned getMaxSupportedInterleaveFactor() const
Get the maximum supported factor for interleaved memory accesses.
void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked store does or does not work with the specified type and in...
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
bool isPositionIndependent() const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
TargetOptions Options
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
ObjectFormatType getObjectFormat() const
Get the object format for this triple.
Definition: Triple.h:409
bool isOSVersionLT(unsigned Major, unsigned Minor=0, unsigned Micro=0) const
Helper function for doing comparisons against version numbers included in the target triple.
Definition: Triple.h:510
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
Definition: Triple.h:645
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:261
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
Type * getArrayElementType() const
Definition: Type.h:411
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
void dump() const
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:225
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
Type * getElementType() const
Definition: DerivedTypes.h:460
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
@ SECREL
Thread Pointer Offset.
@ SBREL
Section Relative (Windows TLS)
@ GOTTPOFF
Global Offset Table, PC Relative.
@ TPOFF
Global Offset Table, Thread Pointer Offset.
TOF
Target Operand Flag enum.
Definition: ARMBaseInfo.h:242
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
Definition: ARMBaseInfo.h:288
@ MO_SBREL
MO_SBREL - On a symbol operand, this represents a static base relative relocation.
Definition: ARMBaseInfo.h:270
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:275
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
Definition: ARMBaseInfo.h:266
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: ARMBaseInfo.h:263
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
unsigned getAM2Offset(unsigned AM2Opc)
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
unsigned createVMOVModImm(unsigned OpCmode, unsigned Val)
int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
int getFP32FP16Imm(const APInt &Imm)
If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding for it.
AddrOpc getAM2Op(unsigned AM2Opc)
bool isBitFieldInvertedMask(unsigned v)
const unsigned FPStatusBits
const unsigned FPReservedBits
const unsigned RoundingBitsPos
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
@ ARM_APCS
ARM Procedure Calling Standard (obsolete, but still used on some targets).
Definition: CallingConv.h:107
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:82
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
@ ARM_AAPCS
ARM Architecture Procedure Calling Standard calling convention (aka EABI).
Definition: CallingConv.h:111
@ CXX_FAST_TLS
Used for access functions.
Definition: CallingConv.h:72
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition: CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
@ ARM_AAPCS_VFP
Same as ARM_AAPCS, but uses hard floating point ABI.
Definition: CallingConv.h:114
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1197
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1193
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ SET_FPENV
Sets the current floating-point environment.
Definition: ISDOpcodes.h:1069
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1360
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1450
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:153
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:512
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1340
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1226
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1342
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1312
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1343
@ RESET_FPENV
Set floating-point environment to default state.
Definition: ISDOpcodes.h:1073
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ SET_FPMODE
Sets the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1092
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ EH_SJLJ_SETUP_DISPATCH
OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) The target initializes the dispatch table here.
Definition: ISDOpcodes.h:157
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1435
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1304
@ RESET_FPMODE
Sets default dynamic floating-point control modes.
Definition: ISDOpcodes.h:1096
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1449
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1338
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1339
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:997
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1490
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:936
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:685
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1118
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1432
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:752
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1292
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1436
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:788
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1341
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1127
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1308
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1451
@ RegisterMask
Definition: ISDOpcodes.h:75
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1222
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1444
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:931
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ GET_FPMODE
Reads the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1087
@ GET_FPENV
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1064
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1336
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1282
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:907
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1319
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1344
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1112
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1452
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:973
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1334
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:458
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:480
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:457
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1335
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1253
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1279
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:680
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1433
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1333
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:1004
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ SPONENTRY
SPONENTRY - Represents the llvm.sponentry intrinsic.
Definition: ISDOpcodes.h:112
@ STRICT_FLDEXP
Definition: ISDOpcodes.h:421
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:906
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:147
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1217
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1141
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:692
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1635
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1551
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1602
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1582
static const int LAST_INDEXED_MODE
Definition: ISDOpcodes.h:1553
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
ID ArrayRef< Type * > Tys
Definition: Intrinsics.h:102
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
bool CC_ARM_APCS_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
@ Read
Definition: CodeGenData.h:107
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:255
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:267
bool FastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
bool RetCC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
void shuffle(Iterator first, Iterator last, RNG &&g)
Definition: STLExtras.h:1558
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
bool CC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:279
bool CC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
bool RetCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_ARM_Win32_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1299
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
CombineLevel
Definition: DAGCombine.h:15
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:260
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool RetFastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
DWARFExpression::Operation Op
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
unsigned gettBLXrOpcode(const MachineFunction &MF)
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Load/store instruction that can be merged with a base address update.
SDNode * N
Instruction that updates a pointer.
unsigned ConstInc
Pointer increment value if it is a constant, or 0 otherwise.
SDValue Inc
Pointer increment operand.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:301
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:458
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:465
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:289
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:448
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:202
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:293
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:65
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:164
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:73
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:303
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:172
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition: KnownBits.h:336
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:804
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoSignedZeros() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setInRegister(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)