LLVM 19.0.0git
ARMISelLowering.cpp
Go to the documentation of this file.
1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMISelLowering.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
20#include "ARMPerfectShuffle.h"
21#include "ARMRegisterInfo.h"
22#include "ARMSelectionDAGInfo.h"
23#include "ARMSubtarget.h"
27#include "Utils/ARMBaseInfo.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/BitVector.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/Statistic.h"
38#include "llvm/ADT/StringRef.h"
40#include "llvm/ADT/Twine.h"
67#include "llvm/IR/Attributes.h"
68#include "llvm/IR/CallingConv.h"
69#include "llvm/IR/Constant.h"
70#include "llvm/IR/Constants.h"
71#include "llvm/IR/DataLayout.h"
72#include "llvm/IR/DebugLoc.h"
74#include "llvm/IR/Function.h"
75#include "llvm/IR/GlobalAlias.h"
76#include "llvm/IR/GlobalValue.h"
78#include "llvm/IR/IRBuilder.h"
79#include "llvm/IR/InlineAsm.h"
80#include "llvm/IR/Instruction.h"
83#include "llvm/IR/Intrinsics.h"
84#include "llvm/IR/IntrinsicsARM.h"
85#include "llvm/IR/Module.h"
87#include "llvm/IR/Type.h"
88#include "llvm/IR/User.h"
89#include "llvm/IR/Value.h"
90#include "llvm/MC/MCInstrDesc.h"
93#include "llvm/MC/MCSchedule.h"
100#include "llvm/Support/Debug.h"
108#include <algorithm>
109#include <cassert>
110#include <cstdint>
111#include <cstdlib>
112#include <iterator>
113#include <limits>
114#include <optional>
115#include <tuple>
116#include <utility>
117#include <vector>
118
119using namespace llvm;
120using namespace llvm::PatternMatch;
121
122#define DEBUG_TYPE "arm-isel"
123
124STATISTIC(NumTailCalls, "Number of tail calls");
125STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
126STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
127STATISTIC(NumConstpoolPromoted,
128 "Number of constants with their storage promoted into constant pools");
129
130static cl::opt<bool>
131ARMInterworking("arm-interworking", cl::Hidden,
132 cl::desc("Enable / disable ARM interworking (for debugging only)"),
133 cl::init(true));
134
136 "arm-promote-constant", cl::Hidden,
137 cl::desc("Enable / disable promotion of unnamed_addr constants into "
138 "constant pools"),
139 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
141 "arm-promote-constant-max-size", cl::Hidden,
142 cl::desc("Maximum size of constant to promote into a constant pool"),
143 cl::init(64));
145 "arm-promote-constant-max-total", cl::Hidden,
146 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
147 cl::init(128));
148
150MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
151 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
152 cl::init(2));
153
154// The APCS parameter registers.
155static const MCPhysReg GPRArgRegs[] = {
156 ARM::R0, ARM::R1, ARM::R2, ARM::R3
157};
158
159void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
160 if (VT != PromotedLdStVT) {
162 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
163
165 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
166 }
167
168 MVT ElemTy = VT.getVectorElementType();
169 if (ElemTy != MVT::f64)
173 if (ElemTy == MVT::i32) {
178 } else {
183 }
192 if (VT.isInteger()) {
196 }
197
198 // Neon does not support vector divide/remainder operations.
207
208 if (!VT.isFloatingPoint() &&
209 VT != MVT::v2i64 && VT != MVT::v1i64)
210 for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
211 setOperationAction(Opcode, VT, Legal);
212 if (!VT.isFloatingPoint())
213 for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
214 setOperationAction(Opcode, VT, Legal);
215}
216
217void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
218 addRegisterClass(VT, &ARM::DPRRegClass);
219 addTypeForNEON(VT, MVT::f64);
220}
221
222void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
223 addRegisterClass(VT, &ARM::DPairRegClass);
224 addTypeForNEON(VT, MVT::v2f64);
225}
226
227void ARMTargetLowering::setAllExpand(MVT VT) {
228 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
229 setOperationAction(Opc, VT, Expand);
230
231 // We support these really simple operations even on types where all
232 // the actual arithmetic has to be broken down into simpler
233 // operations or turned into library calls.
238}
239
240void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
241 LegalizeAction Action) {
242 setLoadExtAction(ISD::EXTLOAD, From, To, Action);
243 setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
244 setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
245}
246
247void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
248 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
249
250 for (auto VT : IntTypes) {
251 addRegisterClass(VT, &ARM::MQPRRegClass);
281
282 // No native support for these.
292
293 // Vector reductions
303
304 if (!HasMVEFP) {
309 } else {
312 }
313
314 // Pre and Post inc are supported on loads and stores
315 for (unsigned im = (unsigned)ISD::PRE_INC;
321 }
322 }
323
324 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
325 for (auto VT : FloatTypes) {
326 addRegisterClass(VT, &ARM::MQPRRegClass);
327 if (!HasMVEFP)
328 setAllExpand(VT);
329
330 // These are legal or custom whether we have MVE.fp or not
343
344 // Pre and Post inc are supported on loads and stores
345 for (unsigned im = (unsigned)ISD::PRE_INC;
351 }
352
353 if (HasMVEFP) {
361
362 // No native support for these.
376 }
377 }
378
379 // Custom Expand smaller than legal vector reductions to prevent false zero
380 // items being added.
389
390 // We 'support' these types up to bitcast/load/store level, regardless of
391 // MVE integer-only / float support. Only doing FP data processing on the FP
392 // vector types is inhibited at integer-only level.
393 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
394 for (auto VT : LongTypes) {
395 addRegisterClass(VT, &ARM::MQPRRegClass);
396 setAllExpand(VT);
402 }
404
405 // We can do bitwise operations on v2i64 vectors
406 setOperationAction(ISD::AND, MVT::v2i64, Legal);
407 setOperationAction(ISD::OR, MVT::v2i64, Legal);
408 setOperationAction(ISD::XOR, MVT::v2i64, Legal);
409
410 // It is legal to extload from v4i8 to v4i16 or v4i32.
411 addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
412 addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
413 addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
414
415 // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
421
422 // Some truncating stores are legal too.
423 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
424 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
425 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
426
427 // Pre and Post inc on these are legal, given the correct extends
428 for (unsigned im = (unsigned)ISD::PRE_INC;
430 for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
435 }
436 }
437
438 // Predicate types
439 const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
440 for (auto VT : pTypes) {
441 addRegisterClass(VT, &ARM::VCCRRegClass);
456
457 if (!HasMVEFP) {
462 }
463 }
467 setOperationAction(ISD::OR, MVT::v2i1, Expand);
473
482}
483
485 const ARMSubtarget &STI)
486 : TargetLowering(TM), Subtarget(&STI) {
487 RegInfo = Subtarget->getRegisterInfo();
488 Itins = Subtarget->getInstrItineraryData();
489
492
493 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
494 !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) {
495 bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
496 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
497 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
498 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
500 }
501
502 if (Subtarget->isTargetMachO()) {
503 // Uses VFP for Thumb libfuncs if available.
504 if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
505 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
506 static const struct {
507 const RTLIB::Libcall Op;
508 const char * const Name;
509 const ISD::CondCode Cond;
510 } LibraryCalls[] = {
511 // Single-precision floating-point arithmetic.
512 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
513 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
514 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
515 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
516
517 // Double-precision floating-point arithmetic.
518 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
519 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
520 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
521 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
522
523 // Single-precision comparisons.
524 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
525 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
526 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
527 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
528 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
529 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
530 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
531
532 // Double-precision comparisons.
533 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
534 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
535 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
536 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
537 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
538 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
539 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
540
541 // Floating-point to integer conversions.
542 // i64 conversions are done via library routines even when generating VFP
543 // instructions, so use the same ones.
544 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
545 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
546 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
547 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
548
549 // Conversions between floating types.
550 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
551 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
552
553 // Integer to floating-point conversions.
554 // i64 conversions are done via library routines even when generating VFP
555 // instructions, so use the same ones.
556 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
557 // e.g., __floatunsidf vs. __floatunssidfvfp.
558 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
559 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
560 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
561 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
562 };
563
564 for (const auto &LC : LibraryCalls) {
565 setLibcallName(LC.Op, LC.Name);
566 if (LC.Cond != ISD::SETCC_INVALID)
567 setCmpLibcallCC(LC.Op, LC.Cond);
568 }
569 }
570 }
571
572 // These libcalls are not available in 32-bit.
573 setLibcallName(RTLIB::SHL_I128, nullptr);
574 setLibcallName(RTLIB::SRL_I128, nullptr);
575 setLibcallName(RTLIB::SRA_I128, nullptr);
576 setLibcallName(RTLIB::MUL_I128, nullptr);
577 setLibcallName(RTLIB::MULO_I64, nullptr);
578 setLibcallName(RTLIB::MULO_I128, nullptr);
579
580 // RTLIB
581 if (Subtarget->isAAPCS_ABI() &&
582 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
583 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
584 static const struct {
585 const RTLIB::Libcall Op;
586 const char * const Name;
587 const CallingConv::ID CC;
588 const ISD::CondCode Cond;
589 } LibraryCalls[] = {
590 // Double-precision floating-point arithmetic helper functions
591 // RTABI chapter 4.1.2, Table 2
592 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
593 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
594 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
595 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
596
597 // Double-precision floating-point comparison helper functions
598 // RTABI chapter 4.1.2, Table 3
599 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
600 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
601 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
602 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
603 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
604 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
605 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
606
607 // Single-precision floating-point arithmetic helper functions
608 // RTABI chapter 4.1.2, Table 4
609 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
610 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
611 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
612 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
613
614 // Single-precision floating-point comparison helper functions
615 // RTABI chapter 4.1.2, Table 5
616 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
617 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
618 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
619 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
620 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
621 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
622 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
623
624 // Floating-point to integer conversions.
625 // RTABI chapter 4.1.2, Table 6
626 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
627 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
628 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
629 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
630 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
631 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
633 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
634
635 // Conversions between floating types.
636 // RTABI chapter 4.1.2, Table 7
637 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
638 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
639 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
640
641 // Integer to floating-point conversions.
642 // RTABI chapter 4.1.2, Table 8
643 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
644 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
645 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
646 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
647 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
648 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
649 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
650 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
651
652 // Long long helper functions
653 // RTABI chapter 4.2, Table 9
654 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
655 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
656 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
657 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
658
659 // Integer division functions
660 // RTABI chapter 4.3.1
661 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
662 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
663 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
664 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
665 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
666 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
667 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
668 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
669 };
670
671 for (const auto &LC : LibraryCalls) {
672 setLibcallName(LC.Op, LC.Name);
673 setLibcallCallingConv(LC.Op, LC.CC);
674 if (LC.Cond != ISD::SETCC_INVALID)
675 setCmpLibcallCC(LC.Op, LC.Cond);
676 }
677
678 // EABI dependent RTLIB
679 if (TM.Options.EABIVersion == EABI::EABI4 ||
680 TM.Options.EABIVersion == EABI::EABI5) {
681 static const struct {
682 const RTLIB::Libcall Op;
683 const char *const Name;
684 const CallingConv::ID CC;
685 const ISD::CondCode Cond;
686 } MemOpsLibraryCalls[] = {
687 // Memory operations
688 // RTABI chapter 4.3.4
689 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
690 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
691 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
692 };
693
694 for (const auto &LC : MemOpsLibraryCalls) {
695 setLibcallName(LC.Op, LC.Name);
696 setLibcallCallingConv(LC.Op, LC.CC);
697 if (LC.Cond != ISD::SETCC_INVALID)
698 setCmpLibcallCC(LC.Op, LC.Cond);
699 }
700 }
701 }
702
703 if (Subtarget->isTargetWindows()) {
704 static const struct {
705 const RTLIB::Libcall Op;
706 const char * const Name;
707 const CallingConv::ID CC;
708 } LibraryCalls[] = {
709 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
710 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
711 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
712 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
713 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
714 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
715 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
716 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
717 };
718
719 for (const auto &LC : LibraryCalls) {
720 setLibcallName(LC.Op, LC.Name);
721 setLibcallCallingConv(LC.Op, LC.CC);
722 }
723 }
724
725 // Use divmod compiler-rt calls for iOS 5.0 and later.
726 if (Subtarget->isTargetMachO() &&
727 !(Subtarget->isTargetIOS() &&
728 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
729 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
730 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
731 }
732
733 // The half <-> float conversion functions are always soft-float on
734 // non-watchos platforms, but are needed for some targets which use a
735 // hard-float calling convention by default.
736 if (!Subtarget->isTargetWatchABI()) {
737 if (Subtarget->isAAPCS_ABI()) {
738 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
739 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
740 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
741 } else {
742 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
743 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
744 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
745 }
746 }
747
748 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
749 // a __gnu_ prefix (which is the default).
750 if (Subtarget->isTargetAEABI()) {
751 static const struct {
752 const RTLIB::Libcall Op;
753 const char * const Name;
754 const CallingConv::ID CC;
755 } LibraryCalls[] = {
756 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
757 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
758 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
759 };
760
761 for (const auto &LC : LibraryCalls) {
762 setLibcallName(LC.Op, LC.Name);
763 setLibcallCallingConv(LC.Op, LC.CC);
764 }
765 }
766
767 if (Subtarget->isThumb1Only())
768 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
769 else
770 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
771
772 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
773 Subtarget->hasFPRegs()) {
774 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
775 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
776
781
782 if (!Subtarget->hasVFP2Base())
783 setAllExpand(MVT::f32);
784 if (!Subtarget->hasFP64())
785 setAllExpand(MVT::f64);
786 }
787
788 if (Subtarget->hasFullFP16()) {
789 addRegisterClass(MVT::f16, &ARM::HPRRegClass);
792
795 }
796
797 if (Subtarget->hasBF16()) {
798 addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
799 setAllExpand(MVT::bf16);
800 if (!Subtarget->hasFullFP16())
802 }
803
805 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
806 setTruncStoreAction(VT, InnerVT, Expand);
807 addAllExtLoads(VT, InnerVT, Expand);
808 }
809
812
814 }
815
818
821
822 if (Subtarget->hasMVEIntegerOps())
823 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
824
825 // Combine low-overhead loop intrinsics so that we can lower i1 types.
826 if (Subtarget->hasLOB()) {
828 }
829
830 if (Subtarget->hasNEON()) {
831 addDRTypeForNEON(MVT::v2f32);
832 addDRTypeForNEON(MVT::v8i8);
833 addDRTypeForNEON(MVT::v4i16);
834 addDRTypeForNEON(MVT::v2i32);
835 addDRTypeForNEON(MVT::v1i64);
836
837 addQRTypeForNEON(MVT::v4f32);
838 addQRTypeForNEON(MVT::v2f64);
839 addQRTypeForNEON(MVT::v16i8);
840 addQRTypeForNEON(MVT::v8i16);
841 addQRTypeForNEON(MVT::v4i32);
842 addQRTypeForNEON(MVT::v2i64);
843
844 if (Subtarget->hasFullFP16()) {
845 addQRTypeForNEON(MVT::v8f16);
846 addDRTypeForNEON(MVT::v4f16);
847 }
848
849 if (Subtarget->hasBF16()) {
850 addQRTypeForNEON(MVT::v8bf16);
851 addDRTypeForNEON(MVT::v4bf16);
852 }
853 }
854
855 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
856 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
857 // none of Neon, MVE or VFP supports any arithmetic operations on it.
858 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
859 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
860 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
861 // FIXME: Code duplication: FDIV and FREM are expanded always, see
862 // ARMTargetLowering::addTypeForNEON method for details.
863 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
864 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
865 // FIXME: Create unittest.
866 // In another words, find a way when "copysign" appears in DAG with vector
867 // operands.
869 // FIXME: Code duplication: SETCC has custom operation action, see
870 // ARMTargetLowering::addTypeForNEON method for details.
872 // FIXME: Create unittest for FNEG and for FABS.
873 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
874 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
876 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
877 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
878 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
879 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
882 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
885 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
891 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
892 }
893
894 if (Subtarget->hasNEON()) {
895 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
896 // supported for v4f32.
898 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
899 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
900 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
901 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
904 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
912
913 // Mark v2f32 intrinsics.
915 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
916 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
917 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
918 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
921 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
929
930 // Neon does not support some operations on v1i64 and v2i64 types.
931 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
932 // Custom handling for some quad-vector types to detect VMULL.
933 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
934 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
935 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
936 // Custom handling for some vector types to avoid expensive expansions
937 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
939 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
941 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
942 // a destination type that is wider than the source, and nor does
943 // it have a FP_TO_[SU]INT instruction with a narrower destination than
944 // source.
953
956
957 // NEON does not have single instruction CTPOP for vectors with element
958 // types wider than 8-bits. However, custom lowering can leverage the
959 // v8i8/v16i8 vcnt instruction.
966
967 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
968 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
969
970 // NEON does not have single instruction CTTZ for vectors.
972 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
973 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
974 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
975
976 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
977 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
978 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
979 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
980
985
990
994 }
995
996 // NEON only has FMA instructions as of VFP4.
997 if (!Subtarget->hasVFP4Base()) {
998 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
999 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
1000 }
1001
1004
1005 // It is legal to extload from v4i8 to v4i16 or v4i32.
1006 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
1007 MVT::v2i32}) {
1012 }
1013 }
1014
1015 for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1016 MVT::v4i32}) {
1021 }
1022 }
1023
1024 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1031 }
1032 if (Subtarget->hasMVEIntegerOps()) {
1035 ISD::SETCC});
1036 }
1037 if (Subtarget->hasMVEFloatOps()) {
1039 }
1040
1041 if (!Subtarget->hasFP64()) {
1042 // When targeting a floating-point unit with only single-precision
1043 // operations, f64 is legal for the few double-precision instructions which
1044 // are present However, no double-precision operations other than moves,
1045 // loads and stores are provided by the hardware.
1083 }
1084
1085 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1088 if (Subtarget->hasFullFP16()) {
1091 }
1092 }
1093
1094 if (!Subtarget->hasFP16()) {
1097 }
1098
1100
1101 // ARM does not have floating-point extending loads.
1102 for (MVT VT : MVT::fp_valuetypes()) {
1103 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1104 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
1105 }
1106
1107 // ... or truncating stores
1108 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1109 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
1110 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
1111
1112 // ARM does not have i1 sign extending load.
1113 for (MVT VT : MVT::integer_valuetypes())
1114 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
1115
1116 // ARM supports all 4 flavors of integer indexed load / store.
1117 if (!Subtarget->isThumb1Only()) {
1118 for (unsigned im = (unsigned)ISD::PRE_INC;
1120 setIndexedLoadAction(im, MVT::i1, Legal);
1121 setIndexedLoadAction(im, MVT::i8, Legal);
1122 setIndexedLoadAction(im, MVT::i16, Legal);
1123 setIndexedLoadAction(im, MVT::i32, Legal);
1124 setIndexedStoreAction(im, MVT::i1, Legal);
1125 setIndexedStoreAction(im, MVT::i8, Legal);
1126 setIndexedStoreAction(im, MVT::i16, Legal);
1127 setIndexedStoreAction(im, MVT::i32, Legal);
1128 }
1129 } else {
1130 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1133 }
1134
1139
1142 if (Subtarget->hasDSP()) {
1151 }
1152 if (Subtarget->hasBaseDSP()) {
1155 }
1156
1157 // i64 operation support.
1160 if (Subtarget->isThumb1Only()) {
1163 }
1164 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1165 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1167
1177
1178 // MVE lowers 64 bit shifts to lsll and lsrl
1179 // assuming that ISD::SRL and SRA of i64 are already marked custom
1180 if (Subtarget->hasMVEIntegerOps())
1182
1183 // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1184 if (Subtarget->isThumb1Only()) {
1188 }
1189
1190 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1192
1193 // ARM does not have ROTL.
1198 }
1201 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1204 }
1205
1206 // @llvm.readcyclecounter requires the Performance Monitors extension.
1207 // Default to the 0 expansion on unsupported platforms.
1208 // FIXME: Technically there are older ARM CPUs that have
1209 // implementation-specific ways of obtaining this information.
1210 if (Subtarget->hasPerfMon())
1212
1213 // Only ARMv6 has BSWAP.
1214 if (!Subtarget->hasV6Ops())
1216
1217 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1218 : Subtarget->hasDivideInARMMode();
1219 if (!hasDivide) {
1220 // These are expanded into libcalls if the cpu doesn't have HW divider.
1223 }
1224
1225 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1228
1231 }
1232
1235
1236 // Register based DivRem for AEABI (RTABI 4.2)
1237 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1238 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1239 Subtarget->isTargetWindows()) {
1242 HasStandaloneRem = false;
1243
1244 if (Subtarget->isTargetWindows()) {
1245 const struct {
1246 const RTLIB::Libcall Op;
1247 const char * const Name;
1248 const CallingConv::ID CC;
1249 } LibraryCalls[] = {
1250 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1251 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1252 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1253 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1254
1255 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1256 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1257 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1258 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1259 };
1260
1261 for (const auto &LC : LibraryCalls) {
1262 setLibcallName(LC.Op, LC.Name);
1263 setLibcallCallingConv(LC.Op, LC.CC);
1264 }
1265 } else {
1266 const struct {
1267 const RTLIB::Libcall Op;
1268 const char * const Name;
1269 const CallingConv::ID CC;
1270 } LibraryCalls[] = {
1271 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1272 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1273 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1274 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1275
1276 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1277 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1278 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1279 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1280 };
1281
1282 for (const auto &LC : LibraryCalls) {
1283 setLibcallName(LC.Op, LC.Name);
1284 setLibcallCallingConv(LC.Op, LC.CC);
1285 }
1286 }
1287
1292 } else {
1295 }
1296
1297 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
1298 // MSVCRT doesn't have powi; fall back to pow
1299 setLibcallName(RTLIB::POWI_F32, nullptr);
1300 setLibcallName(RTLIB::POWI_F64, nullptr);
1301 }
1302
1307
1308 setOperationAction(ISD::TRAP, MVT::Other, Legal);
1310
1311 // Use the default implementation.
1313 setOperationAction(ISD::VAARG, MVT::Other, Expand);
1315 setOperationAction(ISD::VAEND, MVT::Other, Expand);
1318
1319 if (Subtarget->isTargetWindows())
1321 else
1323
1324 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1325 // the default expansion.
1326 InsertFencesForAtomic = false;
1327 if (Subtarget->hasAnyDataBarrier() &&
1328 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1329 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1330 // to ldrex/strex loops already.
1332 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1334
1335 // On v8, we have particularly efficient implementations of atomic fences
1336 // if they can be combined with nearby atomic loads and stores.
1337 if (!Subtarget->hasAcquireRelease() ||
1338 getTargetMachine().getOptLevel() == CodeGenOptLevel::None) {
1339 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1340 InsertFencesForAtomic = true;
1341 }
1342 } else {
1343 // If there's anything we can use as a barrier, go through custom lowering
1344 // for ATOMIC_FENCE.
1345 // If target has DMB in thumb, Fences can be inserted.
1346 if (Subtarget->hasDataBarrier())
1347 InsertFencesForAtomic = true;
1348
1350 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1351
1352 // Set them all for libcall, which will force libcalls.
1365 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1366 // Unordered/Monotonic case.
1367 if (!InsertFencesForAtomic) {
1370 }
1371 }
1372
1373 // Compute supported atomic widths.
1374 if (Subtarget->isTargetLinux() ||
1375 (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1376 // For targets where __sync_* routines are reliably available, we use them
1377 // if necessary.
1378 //
1379 // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
1380 // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
1381 //
1382 // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
1383 // such targets should provide __sync_* routines, which use the ARM mode
1384 // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
1385 // encoding; see ARMISD::MEMBARRIER_MCR.)
1387 } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1388 Subtarget->hasForced32BitAtomics()) {
1389 // Cortex-M (besides Cortex-M0) have 32-bit atomics.
1391 } else {
1392 // We can't assume anything about other targets; just use libatomic
1393 // routines.
1395 }
1396
1398
1400
1401 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1402 if (!Subtarget->hasV6Ops()) {
1405 }
1407
1408 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1409 !Subtarget->isThumb1Only()) {
1410 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1411 // iff target supports vfp2.
1421 }
1422
1423 // We want to custom lower some of our intrinsics.
1428 if (Subtarget->useSjLjEH())
1429 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1430
1440 if (Subtarget->hasFullFP16()) {
1444 }
1445
1447
1450 if (Subtarget->hasFullFP16())
1454 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1455
1456 // We don't support sin/cos/fmod/copysign/pow
1465 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1466 !Subtarget->isThumb1Only()) {
1469 }
1472
1473 if (!Subtarget->hasVFP4Base()) {
1476 }
1477
1478 // Various VFP goodness
1479 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1480 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1481 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1484 }
1485
1486 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1487 if (!Subtarget->hasFP16()) {
1490 }
1491
1492 // Strict floating-point comparisons need custom lowering.
1499 }
1500
1501 // Use __sincos_stret if available.
1502 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1503 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1506 }
1507
1508 // FP-ARMv8 implements a lot of rounding-like FP operations.
1509 if (Subtarget->hasFPARMv8Base()) {
1518 if (Subtarget->hasNEON()) {
1523 }
1524
1525 if (Subtarget->hasFP64()) {
1534 }
1535 }
1536
1537 // FP16 often need to be promoted to call lib functions
1538 if (Subtarget->hasFullFP16()) {
1552
1554 }
1555
1556 if (Subtarget->hasNEON()) {
1557 // vmin and vmax aren't available in a scalar form, so we can use
1558 // a NEON instruction with an undef lane instead.
1567
1568 if (Subtarget->hasFullFP16()) {
1573
1578 }
1579 }
1580
1581 // We have target-specific dag combine patterns for the following nodes:
1582 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1585
1586 if (Subtarget->hasMVEIntegerOps())
1588
1589 if (Subtarget->hasV6Ops())
1591 if (Subtarget->isThumb1Only())
1593 // Attempt to lower smin/smax to ssat/usat
1594 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1595 Subtarget->isThumb2()) {
1597 }
1598
1600
1601 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1602 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1604 else
1606
1607 //// temporary - rewrite interface to use type
1610 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1612 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1614
1615 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1616 // are at least 4 bytes aligned.
1618
1619 // Prefer likely predicted branches to selects on out-of-order cores.
1620 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1621
1622 setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1624
1625 setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1626
1627 if (Subtarget->isThumb() || Subtarget->isThumb2())
1629}
1630
1632 return Subtarget->useSoftFloat();
1633}
1634
1635// FIXME: It might make sense to define the representative register class as the
1636// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1637// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1638// SPR's representative would be DPR_VFP2. This should work well if register
1639// pressure tracking were modified such that a register use would increment the
1640// pressure of the register class's representative and all of it's super
1641// classes' representatives transitively. We have not implemented this because
1642// of the difficulty prior to coalescing of modeling operand register classes
1643// due to the common occurrence of cross class copies and subregister insertions
1644// and extractions.
1645std::pair<const TargetRegisterClass *, uint8_t>
1647 MVT VT) const {
1648 const TargetRegisterClass *RRC = nullptr;
1649 uint8_t Cost = 1;
1650 switch (VT.SimpleTy) {
1651 default:
1653 // Use DPR as representative register class for all floating point
1654 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1655 // the cost is 1 for both f32 and f64.
1656 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1657 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1658 RRC = &ARM::DPRRegClass;
1659 // When NEON is used for SP, only half of the register file is available
1660 // because operations that define both SP and DP results will be constrained
1661 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1662 // coalescing by double-counting the SP regs. See the FIXME above.
1663 if (Subtarget->useNEONForSinglePrecisionFP())
1664 Cost = 2;
1665 break;
1666 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1667 case MVT::v4f32: case MVT::v2f64:
1668 RRC = &ARM::DPRRegClass;
1669 Cost = 2;
1670 break;
1671 case MVT::v4i64:
1672 RRC = &ARM::DPRRegClass;
1673 Cost = 4;
1674 break;
1675 case MVT::v8i64:
1676 RRC = &ARM::DPRRegClass;
1677 Cost = 8;
1678 break;
1679 }
1680 return std::make_pair(RRC, Cost);
1681}
1682
1683const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1684#define MAKE_CASE(V) \
1685 case V: \
1686 return #V;
1687 switch ((ARMISD::NodeType)Opcode) {
1689 break;
1893#undef MAKE_CASE
1894 }
1895 return nullptr;
1896}
1897
1899 EVT VT) const {
1900 if (!VT.isVector())
1901 return getPointerTy(DL);
1902
1903 // MVE has a predicate register.
1904 if ((Subtarget->hasMVEIntegerOps() &&
1905 (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
1906 VT == MVT::v16i8)) ||
1907 (Subtarget->hasMVEFloatOps() &&
1908 (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
1909 return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1911}
1912
1913/// getRegClassFor - Return the register class that should be used for the
1914/// specified value type.
1915const TargetRegisterClass *
1916ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1917 (void)isDivergent;
1918 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1919 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1920 // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1921 // MVE Q registers.
1922 if (Subtarget->hasNEON()) {
1923 if (VT == MVT::v4i64)
1924 return &ARM::QQPRRegClass;
1925 if (VT == MVT::v8i64)
1926 return &ARM::QQQQPRRegClass;
1927 }
1928 if (Subtarget->hasMVEIntegerOps()) {
1929 if (VT == MVT::v4i64)
1930 return &ARM::MQQPRRegClass;
1931 if (VT == MVT::v8i64)
1932 return &ARM::MQQQQPRRegClass;
1933 }
1935}
1936
1937// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1938// source/dest is aligned and the copy size is large enough. We therefore want
1939// to align such objects passed to memory intrinsics.
1941 Align &PrefAlign) const {
1942 if (!isa<MemIntrinsic>(CI))
1943 return false;
1944 MinSize = 8;
1945 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1946 // cycle faster than 4-byte aligned LDM.
1947 PrefAlign =
1948 (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
1949 return true;
1950}
1951
1952// Create a fast isel object.
1953FastISel *
1955 const TargetLibraryInfo *libInfo) const {
1956 return ARM::createFastISel(funcInfo, libInfo);
1957}
1958
1960 unsigned NumVals = N->getNumValues();
1961 if (!NumVals)
1962 return Sched::RegPressure;
1963
1964 for (unsigned i = 0; i != NumVals; ++i) {
1965 EVT VT = N->getValueType(i);
1966 if (VT == MVT::Glue || VT == MVT::Other)
1967 continue;
1968 if (VT.isFloatingPoint() || VT.isVector())
1969 return Sched::ILP;
1970 }
1971
1972 if (!N->isMachineOpcode())
1973 return Sched::RegPressure;
1974
1975 // Load are scheduled for latency even if there instruction itinerary
1976 // is not available.
1977 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1978 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1979
1980 if (MCID.getNumDefs() == 0)
1981 return Sched::RegPressure;
1982 if (!Itins->isEmpty() &&
1983 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2U)
1984 return Sched::ILP;
1985
1986 return Sched::RegPressure;
1987}
1988
1989//===----------------------------------------------------------------------===//
1990// Lowering Code
1991//===----------------------------------------------------------------------===//
1992
1993static bool isSRL16(const SDValue &Op) {
1994 if (Op.getOpcode() != ISD::SRL)
1995 return false;
1996 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1997 return Const->getZExtValue() == 16;
1998 return false;
1999}
2000
2001static bool isSRA16(const SDValue &Op) {
2002 if (Op.getOpcode() != ISD::SRA)
2003 return false;
2004 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2005 return Const->getZExtValue() == 16;
2006 return false;
2007}
2008
2009static bool isSHL16(const SDValue &Op) {
2010 if (Op.getOpcode() != ISD::SHL)
2011 return false;
2012 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2013 return Const->getZExtValue() == 16;
2014 return false;
2015}
2016
2017// Check for a signed 16-bit value. We special case SRA because it makes it
2018// more simple when also looking for SRAs that aren't sign extending a
2019// smaller value. Without the check, we'd need to take extra care with
2020// checking order for some operations.
2021static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
2022 if (isSRA16(Op))
2023 return isSHL16(Op.getOperand(0));
2024 return DAG.ComputeNumSignBits(Op) == 17;
2025}
2026
2027/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
2029 switch (CC) {
2030 default: llvm_unreachable("Unknown condition code!");
2031 case ISD::SETNE: return ARMCC::NE;
2032 case ISD::SETEQ: return ARMCC::EQ;
2033 case ISD::SETGT: return ARMCC::GT;
2034 case ISD::SETGE: return ARMCC::GE;
2035 case ISD::SETLT: return ARMCC::LT;
2036 case ISD::SETLE: return ARMCC::LE;
2037 case ISD::SETUGT: return ARMCC::HI;
2038 case ISD::SETUGE: return ARMCC::HS;
2039 case ISD::SETULT: return ARMCC::LO;
2040 case ISD::SETULE: return ARMCC::LS;
2041 }
2042}
2043
2044/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
2046 ARMCC::CondCodes &CondCode2) {
2047 CondCode2 = ARMCC::AL;
2048 switch (CC) {
2049 default: llvm_unreachable("Unknown FP condition!");
2050 case ISD::SETEQ:
2051 case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
2052 case ISD::SETGT:
2053 case ISD::SETOGT: CondCode = ARMCC::GT; break;
2054 case ISD::SETGE:
2055 case ISD::SETOGE: CondCode = ARMCC::GE; break;
2056 case ISD::SETOLT: CondCode = ARMCC::MI; break;
2057 case ISD::SETOLE: CondCode = ARMCC::LS; break;
2058 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
2059 case ISD::SETO: CondCode = ARMCC::VC; break;
2060 case ISD::SETUO: CondCode = ARMCC::VS; break;
2061 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
2062 case ISD::SETUGT: CondCode = ARMCC::HI; break;
2063 case ISD::SETUGE: CondCode = ARMCC::PL; break;
2064 case ISD::SETLT:
2065 case ISD::SETULT: CondCode = ARMCC::LT; break;
2066 case ISD::SETLE:
2067 case ISD::SETULE: CondCode = ARMCC::LE; break;
2068 case ISD::SETNE:
2069 case ISD::SETUNE: CondCode = ARMCC::NE; break;
2070 }
2071}
2072
2073//===----------------------------------------------------------------------===//
2074// Calling Convention Implementation
2075//===----------------------------------------------------------------------===//
2076
2077/// getEffectiveCallingConv - Get the effective calling convention, taking into
2078/// account presence of floating point hardware and calling convention
2079/// limitations, such as support for variadic functions.
2081ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
2082 bool isVarArg) const {
2083 switch (CC) {
2084 default:
2085 report_fatal_error("Unsupported calling convention");
2088 case CallingConv::GHC:
2090 return CC;
2096 case CallingConv::Swift:
2099 case CallingConv::C:
2100 case CallingConv::Tail:
2101 if (!Subtarget->isAAPCS_ABI())
2102 return CallingConv::ARM_APCS;
2103 else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
2104 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2105 !isVarArg)
2107 else
2109 case CallingConv::Fast:
2111 if (!Subtarget->isAAPCS_ABI()) {
2112 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2113 return CallingConv::Fast;
2114 return CallingConv::ARM_APCS;
2115 } else if (Subtarget->hasVFP2Base() &&
2116 !Subtarget->isThumb1Only() && !isVarArg)
2118 else
2120 }
2121}
2122
2124 bool isVarArg) const {
2125 return CCAssignFnForNode(CC, false, isVarArg);
2126}
2127
2129 bool isVarArg) const {
2130 return CCAssignFnForNode(CC, true, isVarArg);
2131}
2132
2133/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2134/// CallingConvention.
2135CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2136 bool Return,
2137 bool isVarArg) const {
2138 switch (getEffectiveCallingConv(CC, isVarArg)) {
2139 default:
2140 report_fatal_error("Unsupported calling convention");
2142 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2144 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2146 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2147 case CallingConv::Fast:
2148 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2149 case CallingConv::GHC:
2150 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2152 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2154 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2156 return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2157 }
2158}
2159
2160SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2161 MVT LocVT, MVT ValVT, SDValue Val) const {
2162 Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2163 Val);
2164 if (Subtarget->hasFullFP16()) {
2165 Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2166 } else {
2167 Val = DAG.getNode(ISD::TRUNCATE, dl,
2168 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2169 Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2170 }
2171 return Val;
2172}
2173
2174SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2175 MVT LocVT, MVT ValVT,
2176 SDValue Val) const {
2177 if (Subtarget->hasFullFP16()) {
2178 Val = DAG.getNode(ARMISD::VMOVrh, dl,
2179 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2180 } else {
2181 Val = DAG.getNode(ISD::BITCAST, dl,
2182 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2183 Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2184 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2185 }
2186 return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2187}
2188
2189/// LowerCallResult - Lower the result values of a call into the
2190/// appropriate copies out of appropriate physical registers.
2191SDValue ARMTargetLowering::LowerCallResult(
2192 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
2193 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2194 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2195 SDValue ThisVal) const {
2196 // Assign locations to each value returned by this call.
2198 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2199 *DAG.getContext());
2200 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2201
2202 // Copy all of the result registers out of their specified physreg.
2203 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2204 CCValAssign VA = RVLocs[i];
2205
2206 // Pass 'this' value directly from the argument to return value, to avoid
2207 // reg unit interference
2208 if (i == 0 && isThisReturn) {
2209 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
2210 "unexpected return calling convention register assignment");
2211 InVals.push_back(ThisVal);
2212 continue;
2213 }
2214
2215 SDValue Val;
2216 if (VA.needsCustom() &&
2217 (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2218 // Handle f64 or half of a v2f64.
2219 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2220 InGlue);
2221 Chain = Lo.getValue(1);
2222 InGlue = Lo.getValue(2);
2223 VA = RVLocs[++i]; // skip ahead to next loc
2224 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2225 InGlue);
2226 Chain = Hi.getValue(1);
2227 InGlue = Hi.getValue(2);
2228 if (!Subtarget->isLittle())
2229 std::swap (Lo, Hi);
2230 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2231
2232 if (VA.getLocVT() == MVT::v2f64) {
2233 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2234 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2235 DAG.getConstant(0, dl, MVT::i32));
2236
2237 VA = RVLocs[++i]; // skip ahead to next loc
2238 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2239 Chain = Lo.getValue(1);
2240 InGlue = Lo.getValue(2);
2241 VA = RVLocs[++i]; // skip ahead to next loc
2242 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2243 Chain = Hi.getValue(1);
2244 InGlue = Hi.getValue(2);
2245 if (!Subtarget->isLittle())
2246 std::swap (Lo, Hi);
2247 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2248 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2249 DAG.getConstant(1, dl, MVT::i32));
2250 }
2251 } else {
2252 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2253 InGlue);
2254 Chain = Val.getValue(1);
2255 InGlue = Val.getValue(2);
2256 }
2257
2258 switch (VA.getLocInfo()) {
2259 default: llvm_unreachable("Unknown loc info!");
2260 case CCValAssign::Full: break;
2261 case CCValAssign::BCvt:
2262 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2263 break;
2264 }
2265
2266 // f16 arguments have their size extended to 4 bytes and passed as if they
2267 // had been copied to the LSBs of a 32-bit register.
2268 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2269 if (VA.needsCustom() &&
2270 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2271 Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2272
2273 InVals.push_back(Val);
2274 }
2275
2276 return Chain;
2277}
2278
2279std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2280 const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
2281 bool IsTailCall, int SPDiff) const {
2282 SDValue DstAddr;
2283 MachinePointerInfo DstInfo;
2284 int32_t Offset = VA.getLocMemOffset();
2286
2287 if (IsTailCall) {
2288 Offset += SPDiff;
2289 auto PtrVT = getPointerTy(DAG.getDataLayout());
2290 int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2291 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2292 DstAddr = DAG.getFrameIndex(FI, PtrVT);
2293 DstInfo =
2295 } else {
2296 SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
2297 DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2298 StackPtr, PtrOff);
2299 DstInfo =
2301 }
2302
2303 return std::make_pair(DstAddr, DstInfo);
2304}
2305
2306void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2307 SDValue Chain, SDValue &Arg,
2308 RegsToPassVector &RegsToPass,
2309 CCValAssign &VA, CCValAssign &NextVA,
2310 SDValue &StackPtr,
2311 SmallVectorImpl<SDValue> &MemOpChains,
2312 bool IsTailCall,
2313 int SPDiff) const {
2314 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2315 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2316 unsigned id = Subtarget->isLittle() ? 0 : 1;
2317 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2318
2319 if (NextVA.isRegLoc())
2320 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2321 else {
2322 assert(NextVA.isMemLoc());
2323 if (!StackPtr.getNode())
2324 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2326
2327 SDValue DstAddr;
2328 MachinePointerInfo DstInfo;
2329 std::tie(DstAddr, DstInfo) =
2330 computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2331 MemOpChains.push_back(
2332 DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2333 }
2334}
2335
2336static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
2337 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
2339}
2340
2341/// LowerCall - Lowering a call into a callseq_start <-
2342/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2343/// nodes.
2344SDValue
2345ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2346 SmallVectorImpl<SDValue> &InVals) const {
2347 SelectionDAG &DAG = CLI.DAG;
2348 SDLoc &dl = CLI.DL;
2350 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2352 SDValue Chain = CLI.Chain;
2353 SDValue Callee = CLI.Callee;
2354 bool &isTailCall = CLI.IsTailCall;
2355 CallingConv::ID CallConv = CLI.CallConv;
2356 bool doesNotRet = CLI.DoesNotReturn;
2357 bool isVarArg = CLI.IsVarArg;
2358
2362 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2363 bool isThisReturn = false;
2364 bool isCmseNSCall = false;
2365 bool isSibCall = false;
2366 bool PreferIndirect = false;
2367 bool GuardWithBTI = false;
2368
2369 // Lower 'returns_twice' calls to a pseudo-instruction.
2370 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2371 !Subtarget->noBTIAtReturnTwice())
2372 GuardWithBTI = AFI->branchTargetEnforcement();
2373
2374 // Determine whether this is a non-secure function call.
2375 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2376 isCmseNSCall = true;
2377
2378 // Disable tail calls if they're not supported.
2379 if (!Subtarget->supportsTailCall())
2380 isTailCall = false;
2381
2382 // For both the non-secure calls and the returns from a CMSE entry function,
2383 // the function needs to do some extra work afte r the call, or before the
2384 // return, respectively, thus it cannot end with atail call
2385 if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2386 isTailCall = false;
2387
2388 if (isa<GlobalAddressSDNode>(Callee)) {
2389 // If we're optimizing for minimum size and the function is called three or
2390 // more times in this block, we can improve codesize by calling indirectly
2391 // as BLXr has a 16-bit encoding.
2392 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2393 if (CLI.CB) {
2394 auto *BB = CLI.CB->getParent();
2395 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2396 count_if(GV->users(), [&BB](const User *U) {
2397 return isa<Instruction>(U) &&
2398 cast<Instruction>(U)->getParent() == BB;
2399 }) > 2;
2400 }
2401 }
2402 if (isTailCall) {
2403 // Check if it's really possible to do a tail call.
2404 isTailCall = IsEligibleForTailCallOptimization(
2405 Callee, CallConv, isVarArg, isStructRet,
2406 MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
2407 PreferIndirect);
2408
2409 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2410 CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2411 isSibCall = true;
2412
2413 // We don't support GuaranteedTailCallOpt for ARM, only automatically
2414 // detected sibcalls.
2415 if (isTailCall)
2416 ++NumTailCalls;
2417 }
2418
2419 if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2420 report_fatal_error("failed to perform tail call elimination on a call "
2421 "site marked musttail");
2422 // Analyze operands of the call, assigning locations to each operand.
2424 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2425 *DAG.getContext());
2426 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2427
2428 // Get a count of how many bytes are to be pushed on the stack.
2429 unsigned NumBytes = CCInfo.getStackSize();
2430
2431 // SPDiff is the byte offset of the call's argument area from the callee's.
2432 // Stores to callee stack arguments will be placed in FixedStackSlots offset
2433 // by this amount for a tail call. In a sibling call it must be 0 because the
2434 // caller will deallocate the entire stack and the callee still expects its
2435 // arguments to begin at SP+0. Completely unused for non-tail calls.
2436 int SPDiff = 0;
2437
2438 if (isTailCall && !isSibCall) {
2439 auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2440 unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2441
2442 // Since callee will pop argument stack as a tail call, we must keep the
2443 // popped size 16-byte aligned.
2444 Align StackAlign = DAG.getDataLayout().getStackAlignment();
2445 NumBytes = alignTo(NumBytes, StackAlign);
2446
2447 // SPDiff will be negative if this tail call requires more space than we
2448 // would automatically have in our incoming argument space. Positive if we
2449 // can actually shrink the stack.
2450 SPDiff = NumReusableBytes - NumBytes;
2451
2452 // If this call requires more stack than we have available from
2453 // LowerFormalArguments, tell FrameLowering to reserve space for it.
2454 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2455 AFI->setArgRegsSaveSize(-SPDiff);
2456 }
2457
2458 if (isSibCall) {
2459 // For sibling tail calls, memory operands are available in our caller's stack.
2460 NumBytes = 0;
2461 } else {
2462 // Adjust the stack pointer for the new arguments...
2463 // These operations are automatically eliminated by the prolog/epilog pass
2464 Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2465 }
2466
2468 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2469
2470 RegsToPassVector RegsToPass;
2471 SmallVector<SDValue, 8> MemOpChains;
2472
2473 // During a tail call, stores to the argument area must happen after all of
2474 // the function's incoming arguments have been loaded because they may alias.
2475 // This is done by folding in a TokenFactor from LowerFormalArguments, but
2476 // there's no point in doing so repeatedly so this tracks whether that's
2477 // happened yet.
2478 bool AfterFormalArgLoads = false;
2479
2480 // Walk the register/memloc assignments, inserting copies/loads. In the case
2481 // of tail call optimization, arguments are handled later.
2482 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2483 i != e;
2484 ++i, ++realArgIdx) {
2485 CCValAssign &VA = ArgLocs[i];
2486 SDValue Arg = OutVals[realArgIdx];
2487 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2488 bool isByVal = Flags.isByVal();
2489
2490 // Promote the value if needed.
2491 switch (VA.getLocInfo()) {
2492 default: llvm_unreachable("Unknown loc info!");
2493 case CCValAssign::Full: break;
2494 case CCValAssign::SExt:
2495 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2496 break;
2497 case CCValAssign::ZExt:
2498 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2499 break;
2500 case CCValAssign::AExt:
2501 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2502 break;
2503 case CCValAssign::BCvt:
2504 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2505 break;
2506 }
2507
2508 if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2509 Chain = DAG.getStackArgumentTokenFactor(Chain);
2510 AfterFormalArgLoads = true;
2511 }
2512
2513 // f16 arguments have their size extended to 4 bytes and passed as if they
2514 // had been copied to the LSBs of a 32-bit register.
2515 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2516 if (VA.needsCustom() &&
2517 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2518 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2519 } else {
2520 // f16 arguments could have been extended prior to argument lowering.
2521 // Mask them arguments if this is a CMSE nonsecure call.
2522 auto ArgVT = Outs[realArgIdx].ArgVT;
2523 if (isCmseNSCall && (ArgVT == MVT::f16)) {
2524 auto LocBits = VA.getLocVT().getSizeInBits();
2525 auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2526 SDValue Mask =
2527 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2528 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2529 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2530 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2531 }
2532 }
2533
2534 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2535 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2536 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2537 DAG.getConstant(0, dl, MVT::i32));
2538 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2539 DAG.getConstant(1, dl, MVT::i32));
2540
2541 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2542 StackPtr, MemOpChains, isTailCall, SPDiff);
2543
2544 VA = ArgLocs[++i]; // skip ahead to next loc
2545 if (VA.isRegLoc()) {
2546 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2547 StackPtr, MemOpChains, isTailCall, SPDiff);
2548 } else {
2549 assert(VA.isMemLoc());
2550 SDValue DstAddr;
2551 MachinePointerInfo DstInfo;
2552 std::tie(DstAddr, DstInfo) =
2553 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2554 MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2555 }
2556 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2557 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2558 StackPtr, MemOpChains, isTailCall, SPDiff);
2559 } else if (VA.isRegLoc()) {
2560 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2561 Outs[0].VT == MVT::i32) {
2562 assert(VA.getLocVT() == MVT::i32 &&
2563 "unexpected calling convention register assignment");
2564 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2565 "unexpected use of 'returned'");
2566 isThisReturn = true;
2567 }
2568 const TargetOptions &Options = DAG.getTarget().Options;
2569 if (Options.EmitCallSiteInfo)
2570 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
2571 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2572 } else if (isByVal) {
2573 assert(VA.isMemLoc());
2574 unsigned offset = 0;
2575
2576 // True if this byval aggregate will be split between registers
2577 // and memory.
2578 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2579 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2580
2581 if (CurByValIdx < ByValArgsCount) {
2582
2583 unsigned RegBegin, RegEnd;
2584 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2585
2586 EVT PtrVT =
2588 unsigned int i, j;
2589 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2590 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2591 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2592 SDValue Load =
2593 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2594 DAG.InferPtrAlign(AddArg));
2595 MemOpChains.push_back(Load.getValue(1));
2596 RegsToPass.push_back(std::make_pair(j, Load));
2597 }
2598
2599 // If parameter size outsides register area, "offset" value
2600 // helps us to calculate stack slot for remained part properly.
2601 offset = RegEnd - RegBegin;
2602
2603 CCInfo.nextInRegsParam();
2604 }
2605
2606 if (Flags.getByValSize() > 4*offset) {
2607 auto PtrVT = getPointerTy(DAG.getDataLayout());
2608 SDValue Dst;
2609 MachinePointerInfo DstInfo;
2610 std::tie(Dst, DstInfo) =
2611 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2612 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2613 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2614 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2615 MVT::i32);
2616 SDValue AlignNode =
2617 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2618
2619 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2620 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2621 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2622 Ops));
2623 }
2624 } else {
2625 assert(VA.isMemLoc());
2626 SDValue DstAddr;
2627 MachinePointerInfo DstInfo;
2628 std::tie(DstAddr, DstInfo) =
2629 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2630
2631 SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2632 MemOpChains.push_back(Store);
2633 }
2634 }
2635
2636 if (!MemOpChains.empty())
2637 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2638
2639 // Build a sequence of copy-to-reg nodes chained together with token chain
2640 // and flag operands which copy the outgoing args into the appropriate regs.
2641 SDValue InGlue;
2642 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2643 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2644 RegsToPass[i].second, InGlue);
2645 InGlue = Chain.getValue(1);
2646 }
2647
2648 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2649 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2650 // node so that legalize doesn't hack it.
2651 bool isDirect = false;
2652
2654 const GlobalValue *GVal = nullptr;
2655 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2656 GVal = G->getGlobal();
2657 bool isStub = !TM.shouldAssumeDSOLocal(GVal) && Subtarget->isTargetMachO();
2658
2659 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2660 bool isLocalARMFunc = false;
2661 auto PtrVt = getPointerTy(DAG.getDataLayout());
2662
2663 if (Subtarget->genLongCalls()) {
2664 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2665 "long-calls codegen is not position independent!");
2666 // Handle a global address or an external symbol. If it's not one of
2667 // those, the target's already in a register, so we don't need to do
2668 // anything extra.
2669 if (isa<GlobalAddressSDNode>(Callee)) {
2670 if (Subtarget->genExecuteOnly()) {
2671 if (Subtarget->useMovt())
2672 ++NumMovwMovt;
2673 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2674 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2675 } else {
2676 // Create a constant pool entry for the callee address
2677 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2679 GVal, ARMPCLabelIndex, ARMCP::CPValue, 0);
2680
2681 // Get the address of the callee into a register
2682 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2683 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2684 Callee = DAG.getLoad(
2685 PtrVt, dl, DAG.getEntryNode(), Addr,
2687 }
2688 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2689 const char *Sym = S->getSymbol();
2690
2691 if (Subtarget->genExecuteOnly()) {
2692 if (Subtarget->useMovt())
2693 ++NumMovwMovt;
2694 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2695 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2696 } else {
2697 // Create a constant pool entry for the callee address
2698 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2700 *DAG.getContext(), Sym, ARMPCLabelIndex, 0);
2701
2702 // Get the address of the callee into a register
2703 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2704 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2705 Callee = DAG.getLoad(
2706 PtrVt, dl, DAG.getEntryNode(), Addr,
2708 }
2709 }
2710 } else if (isa<GlobalAddressSDNode>(Callee)) {
2711 if (!PreferIndirect) {
2712 isDirect = true;
2713 bool isDef = GVal->isStrongDefinitionForLinker();
2714
2715 // ARM call to a local ARM function is predicable.
2716 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2717 // tBX takes a register source operand.
2718 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2719 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2720 Callee = DAG.getNode(
2721 ARMISD::WrapperPIC, dl, PtrVt,
2722 DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2723 Callee = DAG.getLoad(
2724 PtrVt, dl, DAG.getEntryNode(), Callee,
2728 } else if (Subtarget->isTargetCOFF()) {
2729 assert(Subtarget->isTargetWindows() &&
2730 "Windows is the only supported COFF target");
2731 unsigned TargetFlags = ARMII::MO_NO_FLAG;
2732 if (GVal->hasDLLImportStorageClass())
2733 TargetFlags = ARMII::MO_DLLIMPORT;
2734 else if (!TM.shouldAssumeDSOLocal(GVal))
2735 TargetFlags = ARMII::MO_COFFSTUB;
2736 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, /*offset=*/0,
2737 TargetFlags);
2738 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2739 Callee =
2740 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2741 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2743 } else {
2744 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, 0);
2745 }
2746 }
2747 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2748 isDirect = true;
2749 // tBX takes a register source operand.
2750 const char *Sym = S->getSymbol();
2751 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2752 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2755 ARMPCLabelIndex, 4);
2756 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2757 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2758 Callee = DAG.getLoad(
2759 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2761 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2762 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2763 } else {
2764 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2765 }
2766 }
2767
2768 if (isCmseNSCall) {
2769 assert(!isARMFunc && !isDirect &&
2770 "Cannot handle call to ARM function or direct call");
2771 if (NumBytes > 0) {
2773 "call to non-secure function would "
2774 "require passing arguments on stack",
2775 dl.getDebugLoc());
2776 DAG.getContext()->diagnose(Diag);
2777 }
2778 if (isStructRet) {
2781 "call to non-secure function would return value through pointer",
2782 dl.getDebugLoc());
2783 DAG.getContext()->diagnose(Diag);
2784 }
2785 }
2786
2787 // FIXME: handle tail calls differently.
2788 unsigned CallOpc;
2789 if (Subtarget->isThumb()) {
2790 if (GuardWithBTI)
2791 CallOpc = ARMISD::t2CALL_BTI;
2792 else if (isCmseNSCall)
2793 CallOpc = ARMISD::tSECALL;
2794 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2795 CallOpc = ARMISD::CALL_NOLINK;
2796 else
2797 CallOpc = ARMISD::CALL;
2798 } else {
2799 if (!isDirect && !Subtarget->hasV5TOps())
2800 CallOpc = ARMISD::CALL_NOLINK;
2801 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2802 // Emit regular call when code size is the priority
2803 !Subtarget->hasMinSize())
2804 // "mov lr, pc; b _foo" to avoid confusing the RSP
2805 CallOpc = ARMISD::CALL_NOLINK;
2806 else
2807 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2808 }
2809
2810 // We don't usually want to end the call-sequence here because we would tidy
2811 // the frame up *after* the call, however in the ABI-changing tail-call case
2812 // we've carefully laid out the parameters so that when sp is reset they'll be
2813 // in the correct location.
2814 if (isTailCall && !isSibCall) {
2815 Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, dl);
2816 InGlue = Chain.getValue(1);
2817 }
2818
2819 std::vector<SDValue> Ops;
2820 Ops.push_back(Chain);
2821 Ops.push_back(Callee);
2822
2823 if (isTailCall) {
2824 Ops.push_back(DAG.getTargetConstant(SPDiff, dl, MVT::i32));
2825 }
2826
2827 // Add argument registers to the end of the list so that they are known live
2828 // into the call.
2829 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2830 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2831 RegsToPass[i].second.getValueType()));
2832
2833 // Add a register mask operand representing the call-preserved registers.
2834 const uint32_t *Mask;
2835 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2836 if (isThisReturn) {
2837 // For 'this' returns, use the R0-preserving mask if applicable
2838 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2839 if (!Mask) {
2840 // Set isThisReturn to false if the calling convention is not one that
2841 // allows 'returned' to be modeled in this way, so LowerCallResult does
2842 // not try to pass 'this' straight through
2843 isThisReturn = false;
2844 Mask = ARI->getCallPreservedMask(MF, CallConv);
2845 }
2846 } else
2847 Mask = ARI->getCallPreservedMask(MF, CallConv);
2848
2849 assert(Mask && "Missing call preserved mask for calling convention");
2850 Ops.push_back(DAG.getRegisterMask(Mask));
2851
2852 if (InGlue.getNode())
2853 Ops.push_back(InGlue);
2854
2855 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2856 if (isTailCall) {
2858 SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2859 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2860 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2861 return Ret;
2862 }
2863
2864 // Returns a chain and a flag for retval copy to use.
2865 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2866 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2867 InGlue = Chain.getValue(1);
2868 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2869
2870 // If we're guaranteeing tail-calls will be honoured, the callee must
2871 // pop its own argument stack on return. But this call is *not* a tail call so
2872 // we need to undo that after it returns to restore the status-quo.
2873 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
2874 uint64_t CalleePopBytes =
2875 canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL;
2876
2877 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, dl);
2878 if (!Ins.empty())
2879 InGlue = Chain.getValue(1);
2880
2881 // Handle result values, copying them out of physregs into vregs that we
2882 // return.
2883 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2884 InVals, isThisReturn,
2885 isThisReturn ? OutVals[0] : SDValue());
2886}
2887
2888/// HandleByVal - Every parameter *after* a byval parameter is passed
2889/// on the stack. Remember the next parameter register to allocate,
2890/// and then confiscate the rest of the parameter registers to insure
2891/// this.
2892void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2893 Align Alignment) const {
2894 // Byval (as with any stack) slots are always at least 4 byte aligned.
2895 Alignment = std::max(Alignment, Align(4));
2896
2897 unsigned Reg = State->AllocateReg(GPRArgRegs);
2898 if (!Reg)
2899 return;
2900
2901 unsigned AlignInRegs = Alignment.value() / 4;
2902 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2903 for (unsigned i = 0; i < Waste; ++i)
2904 Reg = State->AllocateReg(GPRArgRegs);
2905
2906 if (!Reg)
2907 return;
2908
2909 unsigned Excess = 4 * (ARM::R4 - Reg);
2910
2911 // Special case when NSAA != SP and parameter size greater than size of
2912 // all remained GPR regs. In that case we can't split parameter, we must
2913 // send it to stack. We also must set NCRN to R4, so waste all
2914 // remained registers.
2915 const unsigned NSAAOffset = State->getStackSize();
2916 if (NSAAOffset != 0 && Size > Excess) {
2917 while (State->AllocateReg(GPRArgRegs))
2918 ;
2919 return;
2920 }
2921
2922 // First register for byval parameter is the first register that wasn't
2923 // allocated before this method call, so it would be "reg".
2924 // If parameter is small enough to be saved in range [reg, r4), then
2925 // the end (first after last) register would be reg + param-size-in-regs,
2926 // else parameter would be splitted between registers and stack,
2927 // end register would be r4 in this case.
2928 unsigned ByValRegBegin = Reg;
2929 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2930 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2931 // Note, first register is allocated in the beginning of function already,
2932 // allocate remained amount of registers we need.
2933 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2934 State->AllocateReg(GPRArgRegs);
2935 // A byval parameter that is split between registers and memory needs its
2936 // size truncated here.
2937 // In the case where the entire structure fits in registers, we set the
2938 // size in memory to zero.
2939 Size = std::max<int>(Size - Excess, 0);
2940}
2941
2942/// MatchingStackOffset - Return true if the given stack call argument is
2943/// already available in the same position (relatively) of the caller's
2944/// incoming argument stack.
2945static
2948 const TargetInstrInfo *TII) {
2949 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2950 int FI = std::numeric_limits<int>::max();
2951 if (Arg.getOpcode() == ISD::CopyFromReg) {
2952 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2953 if (!VR.isVirtual())
2954 return false;
2955 MachineInstr *Def = MRI->getVRegDef(VR);
2956 if (!Def)
2957 return false;
2958 if (!Flags.isByVal()) {
2959 if (!TII->isLoadFromStackSlot(*Def, FI))
2960 return false;
2961 } else {
2962 return false;
2963 }
2964 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2965 if (Flags.isByVal())
2966 // ByVal argument is passed in as a pointer but it's now being
2967 // dereferenced. e.g.
2968 // define @foo(%struct.X* %A) {
2969 // tail call @bar(%struct.X* byval %A)
2970 // }
2971 return false;
2972 SDValue Ptr = Ld->getBasePtr();
2973 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2974 if (!FINode)
2975 return false;
2976 FI = FINode->getIndex();
2977 } else
2978 return false;
2979
2980 assert(FI != std::numeric_limits<int>::max());
2981 if (!MFI.isFixedObjectIndex(FI))
2982 return false;
2983 return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2984}
2985
2986/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2987/// for tail call optimization. Targets which want to do tail call
2988/// optimization should implement this function.
2989bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2990 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2991 bool isCalleeStructRet, bool isCallerStructRet,
2993 const SmallVectorImpl<SDValue> &OutVals,
2995 const bool isIndirect) const {
2997 const Function &CallerF = MF.getFunction();
2998 CallingConv::ID CallerCC = CallerF.getCallingConv();
2999
3000 assert(Subtarget->supportsTailCall());
3001
3002 // Indirect tail calls cannot be optimized for Thumb1 if the args
3003 // to the call take up r0-r3. The reason is that there are no legal registers
3004 // left to hold the pointer to the function to be called.
3005 // Similarly, if the function uses return address sign and authentication,
3006 // r12 is needed to hold the PAC and is not available to hold the callee
3007 // address.
3008 if (Outs.size() >= 4 &&
3009 (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) {
3010 if (Subtarget->isThumb1Only())
3011 return false;
3012 // Conservatively assume the function spills LR.
3014 return false;
3015 }
3016
3017 // Look for obvious safe cases to perform tail call optimization that do not
3018 // require ABI changes. This is what gcc calls sibcall.
3019
3020 // Exception-handling functions need a special set of instructions to indicate
3021 // a return to the hardware. Tail-calling another function would probably
3022 // break this.
3023 if (CallerF.hasFnAttribute("interrupt"))
3024 return false;
3025
3026 if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
3027 return CalleeCC == CallerCC;
3028
3029 // Also avoid sibcall optimization if either caller or callee uses struct
3030 // return semantics.
3031 if (isCalleeStructRet || isCallerStructRet)
3032 return false;
3033
3034 // Externally-defined functions with weak linkage should not be
3035 // tail-called on ARM when the OS does not support dynamic
3036 // pre-emption of symbols, as the AAELF spec requires normal calls
3037 // to undefined weak functions to be replaced with a NOP or jump to the
3038 // next instruction. The behaviour of branch instructions in this
3039 // situation (as used for tail calls) is implementation-defined, so we
3040 // cannot rely on the linker replacing the tail call with a return.
3041 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3042 const GlobalValue *GV = G->getGlobal();
3044 if (GV->hasExternalWeakLinkage() &&
3045 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3046 return false;
3047 }
3048
3049 // Check that the call results are passed in the same way.
3050 LLVMContext &C = *DAG.getContext();
3052 getEffectiveCallingConv(CalleeCC, isVarArg),
3053 getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
3054 CCAssignFnForReturn(CalleeCC, isVarArg),
3055 CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
3056 return false;
3057 // The callee has to preserve all registers the caller needs to preserve.
3058 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3059 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3060 if (CalleeCC != CallerCC) {
3061 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3062 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3063 return false;
3064 }
3065
3066 // If Caller's vararg or byval argument has been split between registers and
3067 // stack, do not perform tail call, since part of the argument is in caller's
3068 // local frame.
3069 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3070 if (AFI_Caller->getArgRegsSaveSize())
3071 return false;
3072
3073 // If the callee takes no arguments then go on to check the results of the
3074 // call.
3075 if (!Outs.empty()) {
3076 // Check if stack adjustment is needed. For now, do not do this if any
3077 // argument is passed on the stack.
3079 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3080 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3081 if (CCInfo.getStackSize()) {
3082 // Check if the arguments are already laid out in the right way as
3083 // the caller's fixed stack objects.
3084 MachineFrameInfo &MFI = MF.getFrameInfo();
3085 const MachineRegisterInfo *MRI = &MF.getRegInfo();
3086 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3087 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
3088 i != e;
3089 ++i, ++realArgIdx) {
3090 CCValAssign &VA = ArgLocs[i];
3091 EVT RegVT = VA.getLocVT();
3092 SDValue Arg = OutVals[realArgIdx];
3093 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3095 return false;
3096 if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
3097 // f64 and vector types are split into multiple registers or
3098 // register/stack-slot combinations. The types will not match
3099 // the registers; give up on memory f64 refs until we figure
3100 // out what to do about this.
3101 if (!VA.isRegLoc())
3102 return false;
3103 if (!ArgLocs[++i].isRegLoc())
3104 return false;
3105 if (RegVT == MVT::v2f64) {
3106 if (!ArgLocs[++i].isRegLoc())
3107 return false;
3108 if (!ArgLocs[++i].isRegLoc())
3109 return false;
3110 }
3111 } else if (!VA.isRegLoc()) {
3112 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
3113 MFI, MRI, TII))
3114 return false;
3115 }
3116 }
3117 }
3118
3119 const MachineRegisterInfo &MRI = MF.getRegInfo();
3120 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3121 return false;
3122 }
3123
3124 return true;
3125}
3126
3127bool
3128ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3129 MachineFunction &MF, bool isVarArg,
3131 LLVMContext &Context) const {
3133 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3134 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3135}
3136
3138 const SDLoc &DL, SelectionDAG &DAG) {
3139 const MachineFunction &MF = DAG.getMachineFunction();
3140 const Function &F = MF.getFunction();
3141
3142 StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
3143
3144 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
3145 // version of the "preferred return address". These offsets affect the return
3146 // instruction if this is a return from PL1 without hypervisor extensions.
3147 // IRQ/FIQ: +4 "subs pc, lr, #4"
3148 // SWI: 0 "subs pc, lr, #0"
3149 // ABORT: +4 "subs pc, lr, #4"
3150 // UNDEF: +4/+2 "subs pc, lr, #0"
3151 // UNDEF varies depending on where the exception came from ARM or Thumb
3152 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
3153
3154 int64_t LROffset;
3155 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3156 IntKind == "ABORT")
3157 LROffset = 4;
3158 else if (IntKind == "SWI" || IntKind == "UNDEF")
3159 LROffset = 0;
3160 else
3161 report_fatal_error("Unsupported interrupt attribute. If present, value "
3162 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3163
3164 RetOps.insert(RetOps.begin() + 1,
3165 DAG.getConstant(LROffset, DL, MVT::i32, false));
3166
3167 return DAG.getNode(ARMISD::INTRET_GLUE, DL, MVT::Other, RetOps);
3168}
3169
3170SDValue
3171ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3172 bool isVarArg,
3174 const SmallVectorImpl<SDValue> &OutVals,
3175 const SDLoc &dl, SelectionDAG &DAG) const {
3176 // CCValAssign - represent the assignment of the return value to a location.
3178
3179 // CCState - Info about the registers and stack slots.
3180 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3181 *DAG.getContext());
3182
3183 // Analyze outgoing return values.
3184 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3185
3186 SDValue Glue;
3188 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3189 bool isLittleEndian = Subtarget->isLittle();
3190
3193 AFI->setReturnRegsCount(RVLocs.size());
3194
3195 // Report error if cmse entry function returns structure through first ptr arg.
3196 if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3197 // Note: using an empty SDLoc(), as the first line of the function is a
3198 // better place to report than the last line.
3201 "secure entry function would return value through pointer",
3202 SDLoc().getDebugLoc());
3203 DAG.getContext()->diagnose(Diag);
3204 }
3205
3206 // Copy the result values into the output registers.
3207 for (unsigned i = 0, realRVLocIdx = 0;
3208 i != RVLocs.size();
3209 ++i, ++realRVLocIdx) {
3210 CCValAssign &VA = RVLocs[i];
3211 assert(VA.isRegLoc() && "Can only return in registers!");
3212
3213 SDValue Arg = OutVals[realRVLocIdx];
3214 bool ReturnF16 = false;
3215
3216 if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3217 // Half-precision return values can be returned like this:
3218 //
3219 // t11 f16 = fadd ...
3220 // t12: i16 = bitcast t11
3221 // t13: i32 = zero_extend t12
3222 // t14: f32 = bitcast t13 <~~~~~~~ Arg
3223 //
3224 // to avoid code generation for bitcasts, we simply set Arg to the node
3225 // that produces the f16 value, t11 in this case.
3226 //
3227 if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3228 SDValue ZE = Arg.getOperand(0);
3229 if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3230 SDValue BC = ZE.getOperand(0);
3231 if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3232 Arg = BC.getOperand(0);
3233 ReturnF16 = true;
3234 }
3235 }
3236 }
3237 }
3238
3239 switch (VA.getLocInfo()) {
3240 default: llvm_unreachable("Unknown loc info!");
3241 case CCValAssign::Full: break;
3242 case CCValAssign::BCvt:
3243 if (!ReturnF16)
3244 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3245 break;
3246 }
3247
3248 // Mask f16 arguments if this is a CMSE nonsecure entry.
3249 auto RetVT = Outs[realRVLocIdx].ArgVT;
3250 if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3251 if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3252 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3253 } else {
3254 auto LocBits = VA.getLocVT().getSizeInBits();
3255 auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3256 SDValue Mask =
3257 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3258 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3259 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3260 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3261 }
3262 }
3263
3264 if (VA.needsCustom() &&
3265 (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3266 if (VA.getLocVT() == MVT::v2f64) {
3267 // Extract the first half and return it in two registers.
3268 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3269 DAG.getConstant(0, dl, MVT::i32));
3270 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3271 DAG.getVTList(MVT::i32, MVT::i32), Half);
3272
3273 Chain =
3274 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3275 HalfGPRs.getValue(isLittleEndian ? 0 : 1), Glue);
3276 Glue = Chain.getValue(1);
3277 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3278 VA = RVLocs[++i]; // skip ahead to next loc
3279 Chain =
3280 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3281 HalfGPRs.getValue(isLittleEndian ? 1 : 0), Glue);
3282 Glue = Chain.getValue(1);
3283 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3284 VA = RVLocs[++i]; // skip ahead to next loc
3285
3286 // Extract the 2nd half and fall through to handle it as an f64 value.
3287 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3288 DAG.getConstant(1, dl, MVT::i32));
3289 }
3290 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3291 // available.
3292 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3293 DAG.getVTList(MVT::i32, MVT::i32), Arg);
3294 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3295 fmrrd.getValue(isLittleEndian ? 0 : 1), Glue);
3296 Glue = Chain.getValue(1);
3297 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3298 VA = RVLocs[++i]; // skip ahead to next loc
3299 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3300 fmrrd.getValue(isLittleEndian ? 1 : 0), Glue);
3301 } else
3302 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
3303
3304 // Guarantee that all emitted copies are
3305 // stuck together, avoiding something bad.
3306 Glue = Chain.getValue(1);
3307 RetOps.push_back(DAG.getRegister(
3308 VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3309 }
3310 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3311 const MCPhysReg *I =
3312 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3313 if (I) {
3314 for (; *I; ++I) {
3315 if (ARM::GPRRegClass.contains(*I))
3316 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3317 else if (ARM::DPRRegClass.contains(*I))
3319 else
3320 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3321 }
3322 }
3323
3324 // Update chain and glue.
3325 RetOps[0] = Chain;
3326 if (Glue.getNode())
3327 RetOps.push_back(Glue);
3328
3329 // CPUs which aren't M-class use a special sequence to return from
3330 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3331 // though we use "subs pc, lr, #N").
3332 //
3333 // M-class CPUs actually use a normal return sequence with a special
3334 // (hardware-provided) value in LR, so the normal code path works.
3335 if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3336 !Subtarget->isMClass()) {
3337 if (Subtarget->isThumb1Only())
3338 report_fatal_error("interrupt attribute is not supported in Thumb1");
3339 return LowerInterruptReturn(RetOps, dl, DAG);
3340 }
3341
3344 return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3345}
3346
3347bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3348 if (N->getNumValues() != 1)
3349 return false;
3350 if (!N->hasNUsesOfValue(1, 0))
3351 return false;
3352
3353 SDValue TCChain = Chain;
3354 SDNode *Copy = *N->use_begin();
3355 if (Copy->getOpcode() == ISD::CopyToReg) {
3356 // If the copy has a glue operand, we conservatively assume it isn't safe to
3357 // perform a tail call.
3358 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3359 return false;
3360 TCChain = Copy->getOperand(0);
3361 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3362 SDNode *VMov = Copy;
3363 // f64 returned in a pair of GPRs.
3365 for (SDNode *U : VMov->uses()) {
3366 if (U->getOpcode() != ISD::CopyToReg)
3367 return false;
3368 Copies.insert(U);
3369 }
3370 if (Copies.size() > 2)
3371 return false;
3372
3373 for (SDNode *U : VMov->uses()) {
3374 SDValue UseChain = U->getOperand(0);
3375 if (Copies.count(UseChain.getNode()))
3376 // Second CopyToReg
3377 Copy = U;
3378 else {
3379 // We are at the top of this chain.
3380 // If the copy has a glue operand, we conservatively assume it
3381 // isn't safe to perform a tail call.
3382 if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3383 return false;
3384 // First CopyToReg
3385 TCChain = UseChain;
3386 }
3387 }
3388 } else if (Copy->getOpcode() == ISD::BITCAST) {
3389 // f32 returned in a single GPR.
3390 if (!Copy->hasOneUse())
3391 return false;
3392 Copy = *Copy->use_begin();
3393 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3394 return false;
3395 // If the copy has a glue operand, we conservatively assume it isn't safe to
3396 // perform a tail call.
3397 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3398 return false;
3399 TCChain = Copy->getOperand(0);
3400 } else {
3401 return false;
3402 }
3403
3404 bool HasRet = false;
3405 for (const SDNode *U : Copy->uses()) {
3406 if (U->getOpcode() != ARMISD::RET_GLUE &&
3407 U->getOpcode() != ARMISD::INTRET_GLUE)
3408 return false;
3409 HasRet = true;
3410 }
3411
3412 if (!HasRet)
3413 return false;
3414
3415 Chain = TCChain;
3416 return true;
3417}
3418
3419bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3420 if (!Subtarget->supportsTailCall())
3421 return false;
3422
3423 if (!CI->isTailCall())
3424 return false;
3425
3426 return true;
3427}
3428
3429// Trying to write a 64 bit value so need to split into two 32 bit values first,
3430// and pass the lower and high parts through.
3432 SDLoc DL(Op);
3433 SDValue WriteValue = Op->getOperand(2);
3434
3435 // This function is only supposed to be called for i64 type argument.
3436 assert(WriteValue.getValueType() == MVT::i64
3437 && "LowerWRITE_REGISTER called for non-i64 type argument.");
3438
3439 SDValue Lo, Hi;
3440 std::tie(Lo, Hi) = DAG.SplitScalar(WriteValue, DL, MVT::i32, MVT::i32);
3441 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3442 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3443}
3444
3445// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3446// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3447// one of the above mentioned nodes. It has to be wrapped because otherwise
3448// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3449// be used to form addressing mode. These wrapped nodes will be selected
3450// into MOVi.
3451SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3452 SelectionDAG &DAG) const {
3453 EVT PtrVT = Op.getValueType();
3454 // FIXME there is no actual debug info here
3455 SDLoc dl(Op);
3456 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3457 SDValue Res;
3458
3459 // When generating execute-only code Constant Pools must be promoted to the
3460 // global data section. It's a bit ugly that we can't share them across basic
3461 // blocks, but this way we guarantee that execute-only behaves correct with
3462 // position-independent addressing modes.
3463 if (Subtarget->genExecuteOnly()) {
3464 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3465 auto T = const_cast<Type*>(CP->getType());
3466 auto C = const_cast<Constant*>(CP->getConstVal());
3467 auto M = const_cast<Module*>(DAG.getMachineFunction().
3469 auto GV = new GlobalVariable(
3470 *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3473 Twine(AFI->createPICLabelUId())
3474 );
3475 SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3476 dl, PtrVT);
3477 return LowerGlobalAddress(GA, DAG);
3478 }
3479
3480 // The 16-bit ADR instruction can only encode offsets that are multiples of 4,
3481 // so we need to align to at least 4 bytes when we don't have 32-bit ADR.
3482 Align CPAlign = CP->getAlign();
3483 if (Subtarget->isThumb1Only())
3484 CPAlign = std::max(CPAlign, Align(4));
3485 if (CP->isMachineConstantPoolEntry())
3486 Res =
3487 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign);
3488 else
3489 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign);
3490 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3491}
3492
3494 // If we don't have a 32-bit pc-relative branch instruction then the jump
3495 // table consists of block addresses. Usually this is inline, but for
3496 // execute-only it must be placed out-of-line.
3497 if (Subtarget->genExecuteOnly() && !Subtarget->hasV8MBaselineOps())
3500}
3501
3502SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3503 SelectionDAG &DAG) const {
3506 unsigned ARMPCLabelIndex = 0;
3507 SDLoc DL(Op);
3508 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3509 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3510 SDValue CPAddr;
3511 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3512 if (!IsPositionIndependent) {
3513 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3514 } else {
3515 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3516 ARMPCLabelIndex = AFI->createPICLabelUId();
3518 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3519 ARMCP::CPBlockAddress, PCAdj);
3520 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3521 }
3522 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3523 SDValue Result = DAG.getLoad(
3524 PtrVT, DL, DAG.getEntryNode(), CPAddr,
3526 if (!IsPositionIndependent)
3527 return Result;
3528 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3529 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3530}
3531
3532/// Convert a TLS address reference into the correct sequence of loads
3533/// and calls to compute the variable's address for Darwin, and return an
3534/// SDValue containing the final node.
3535
3536/// Darwin only has one TLS scheme which must be capable of dealing with the
3537/// fully general situation, in the worst case. This means:
3538/// + "extern __thread" declaration.
3539/// + Defined in a possibly unknown dynamic library.
3540///
3541/// The general system is that each __thread variable has a [3 x i32] descriptor
3542/// which contains information used by the runtime to calculate the address. The
3543/// only part of this the compiler needs to know about is the first word, which
3544/// contains a function pointer that must be called with the address of the
3545/// entire descriptor in "r0".
3546///
3547/// Since this descriptor may be in a different unit, in general access must
3548/// proceed along the usual ARM rules. A common sequence to produce is:
3549///
3550/// movw rT1, :lower16:_var$non_lazy_ptr
3551/// movt rT1, :upper16:_var$non_lazy_ptr
3552/// ldr r0, [rT1]
3553/// ldr rT2, [r0]
3554/// blx rT2
3555/// [...address now in r0...]
3556SDValue
3557ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3558 SelectionDAG &DAG) const {
3559 assert(Subtarget->isTargetDarwin() &&
3560 "This function expects a Darwin target");
3561 SDLoc DL(Op);
3562
3563 // First step is to get the address of the actua global symbol. This is where
3564 // the TLS descriptor lives.
3565 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3566
3567 // The first entry in the descriptor is a function pointer that we must call
3568 // to obtain the address of the variable.
3569 SDValue Chain = DAG.getEntryNode();
3570 SDValue FuncTLVGet = DAG.getLoad(
3571 MVT::i32, DL, Chain, DescAddr,
3575 Chain = FuncTLVGet.getValue(1);
3576
3578 MachineFrameInfo &MFI = F.getFrameInfo();
3579 MFI.setAdjustsStack(true);
3580
3581 // TLS calls preserve all registers except those that absolutely must be
3582 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3583 // silly).
3584 auto TRI =
3586 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3588
3589 // Finally, we can make the call. This is just a degenerate version of a
3590 // normal AArch64 call node: r0 takes the address of the descriptor, and
3591 // returns the address of the variable in this thread.
3592 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3593 Chain =
3594 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3595 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3596 DAG.getRegisterMask(Mask), Chain.getValue(1));
3597 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3598}
3599
3600SDValue
3601ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3602 SelectionDAG &DAG) const {
3603 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3604
3605 SDValue Chain = DAG.getEntryNode();
3606 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3607 SDLoc DL(Op);
3608
3609 // Load the current TEB (thread environment block)
3610 SDValue Ops[] = {Chain,
3611 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3612 DAG.getTargetConstant(15, DL, MVT::i32),
3613 DAG.getTargetConstant(0, DL, MVT::i32),
3614 DAG.getTargetConstant(13, DL, MVT::i32),
3615 DAG.getTargetConstant(0, DL, MVT::i32),
3616 DAG.getTargetConstant(2, DL, MVT::i32)};
3617 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3618 DAG.getVTList(MVT::i32, MVT::Other), Ops);
3619
3620 SDValue TEB = CurrentTEB.getValue(0);
3621 Chain = CurrentTEB.getValue(1);
3622
3623 // Load the ThreadLocalStoragePointer from the TEB
3624 // A pointer to the TLS array is located at offset 0x2c from the TEB.
3625 SDValue TLSArray =
3626 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3627 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3628
3629 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3630 // offset into the TLSArray.
3631
3632 // Load the TLS index from the C runtime
3633 SDValue TLSIndex =
3634 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3635 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3636 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3637
3638 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3639 DAG.getConstant(2, DL, MVT::i32));
3640 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3641 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3643
3644 // Get the offset of the start of the .tls section (section base)
3645 const auto *GA = cast<GlobalAddressSDNode>(Op);
3646 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3647 SDValue Offset = DAG.getLoad(
3648 PtrVT, DL, Chain,
3649 DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3650 DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3652
3653 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3654}
3655
3656// Lower ISD::GlobalTLSAddress using the "general dynamic" model
3657SDValue
3658ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3659 SelectionDAG &DAG) const {
3660 SDLoc dl(GA);
3661 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3662 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3665 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3667 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3668 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3669 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3670 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3671 Argument = DAG.getLoad(
3672 PtrVT, dl, DAG.getEntryNode(), Argument,
3674 SDValue Chain = Argument.getValue(1);
3675
3676 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3677 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3678
3679 // call __tls_get_addr.
3681 ArgListEntry Entry;
3682 Entry.Node = Argument;
3683 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3684 Args.push_back(Entry);
3685
3686 // FIXME: is there useful debug info available here?
3688 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3690 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3691
3692 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3693 return CallResult.first;
3694}
3695
3696// Lower ISD::GlobalTLSAddress using the "initial exec" or
3697// "local exec" model.
3698SDValue
3699ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3700 SelectionDAG &DAG,
3701 TLSModel::Model model) const {
3702 const GlobalValue *GV = GA->getGlobal();
3703 SDLoc dl(GA);
3705 SDValue Chain = DAG.getEntryNode();
3706 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3707 // Get the Thread Pointer
3709
3710 if (model == TLSModel::InitialExec) {
3713 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3714 // Initial exec model.
3715 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3717 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3719 true);
3720 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3721 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3722 Offset = DAG.getLoad(
3723 PtrVT, dl, Chain, Offset,
3725 Chain = Offset.getValue(1);
3726
3727 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3728 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3729
3730 Offset = DAG.getLoad(
3731 PtrVT, dl, Chain, Offset,
3733 } else {
3734 // local exec model
3735 assert(model == TLSModel::LocalExec);
3738 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3739 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3740 Offset = DAG.getLoad(
3741 PtrVT, dl, Chain, Offset,
3743 }
3744
3745 // The address of the thread local variable is the add of the thread
3746 // pointer with the offset of the variable.
3747 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3748}
3749
3750SDValue
3751ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3752 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3753 if (DAG.getTarget().useEmulatedTLS())
3754 return LowerToTLSEmulatedModel(GA, DAG);
3755
3756 if (Subtarget->isTargetDarwin())
3757 return LowerGlobalTLSAddressDarwin(Op, DAG);
3758
3759 if (Subtarget->isTargetWindows())
3760 return LowerGlobalTLSAddressWindows(Op, DAG);
3761
3762 // TODO: implement the "local dynamic" model
3763 assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3765
3766 switch (model) {
3769 return LowerToTLSGeneralDynamicModel(GA, DAG);
3772 return LowerToTLSExecModels(GA, DAG, model);
3773 }
3774 llvm_unreachable("bogus TLS model");
3775}
3776
3777/// Return true if all users of V are within function F, looking through
3778/// ConstantExprs.
3779static bool allUsersAreInFunction(const Value *V, const Function *F) {
3780 SmallVector<const User*,4> Worklist(V->users());
3781 while (!Worklist.empty()) {
3782 auto *U = Worklist.pop_back_val();
3783 if (isa<ConstantExpr>(U)) {
3784 append_range(Worklist, U->users());
3785 continue;
3786 }
3787
3788 auto *I = dyn_cast<Instruction>(U);
3789 if (!I || I->getParent()->getParent() != F)
3790 return false;
3791 }
3792 return true;
3793}
3794
3796 const GlobalValue *GV, SelectionDAG &DAG,
3797 EVT PtrVT, const SDLoc &dl) {
3798 // If we're creating a pool entry for a constant global with unnamed address,
3799 // and the global is small enough, we can emit it inline into the constant pool
3800 // to save ourselves an indirection.
3801 //
3802 // This is a win if the constant is only used in one function (so it doesn't
3803 // need to be duplicated) or duplicating the constant wouldn't increase code
3804 // size (implying the constant is no larger than 4 bytes).
3805 const Function &F = DAG.getMachineFunction().getFunction();
3806
3807 // We rely on this decision to inline being idemopotent and unrelated to the
3808 // use-site. We know that if we inline a variable at one use site, we'll
3809 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3810 // doesn't know about this optimization, so bail out if it's enabled else
3811 // we could decide to inline here (and thus never emit the GV) but require
3812 // the GV from fast-isel generated code.
3815 return SDValue();
3816
3817 auto *GVar = dyn_cast<GlobalVariable>(GV);
3818 if (!GVar || !GVar->hasInitializer() ||
3819 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3820 !GVar->hasLocalLinkage())
3821 return SDValue();
3822
3823 // If we inline a value that contains relocations, we move the relocations
3824 // from .data to .text. This is not allowed in position-independent code.
3825 auto *Init = GVar->getInitializer();
3826 if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3827 Init->needsDynamicRelocation())
3828 return SDValue();
3829
3830 // The constant islands pass can only really deal with alignment requests
3831 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3832 // any type wanting greater alignment requirements than 4 bytes. We also
3833 // can only promote constants that are multiples of 4 bytes in size or
3834 // are paddable to a multiple of 4. Currently we only try and pad constants
3835 // that are strings for simplicity.
3836 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3837 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3838 Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
3839 unsigned RequiredPadding = 4 - (Size % 4);
3840 bool PaddingPossible =
3841 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3842 if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
3843 Size == 0)
3844 return SDValue();
3845
3846 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3849
3850 // We can't bloat the constant pool too much, else the ConstantIslands pass
3851 // may fail to converge. If we haven't promoted this global yet (it may have
3852 // multiple uses), and promoting it would increase the constant pool size (Sz
3853 // > 4), ensure we have space to do so up to MaxTotal.
3854 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3855 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3857 return SDValue();
3858
3859 // This is only valid if all users are in a single function; we can't clone
3860 // the constant in general. The LLVM IR unnamed_addr allows merging
3861 // constants, but not cloning them.
3862 //
3863 // We could potentially allow cloning if we could prove all uses of the
3864 // constant in the current function don't care about the address, like
3865 // printf format strings. But that isn't implemented for now.
3866 if (!allUsersAreInFunction(GVar, &F))
3867 return SDValue();
3868
3869 // We're going to inline this global. Pad it out if needed.
3870 if (RequiredPadding != 4) {
3871 StringRef S = CDAInit->getAsString();
3872
3874 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3875 while (RequiredPadding--)
3876 V.push_back(0);
3878 }
3879
3880 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3881 SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4));
3882 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3885 PaddedSize - 4);
3886 }
3887 ++NumConstpoolPromoted;
3888 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3889}
3890
3892 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3893 if (!(GV = GA->getAliaseeObject()))
3894 return false;
3895 if (const auto *V = dyn_cast<GlobalVariable>(GV))
3896 return V->isConstant();
3897 return isa<Function>(GV);
3898}
3899
3900SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3901 SelectionDAG &DAG) const {
3902 switch (Subtarget->getTargetTriple().getObjectFormat()) {
3903 default: llvm_unreachable("unknown object format");
3904 case Triple::COFF:
3905 return LowerGlobalAddressWindows(Op, DAG);
3906 case Triple::ELF:
3907 return LowerGlobalAddressELF(Op, DAG);
3908 case Triple::MachO:
3909 return LowerGlobalAddressDarwin(Op, DAG);
3910 }
3911}
3912
3913SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3914 SelectionDAG &DAG) const {
3915 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3916 SDLoc dl(Op);
3917 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3918 bool IsRO = isReadOnly(GV);
3919
3920 // promoteToConstantPool only if not generating XO text section
3921 if (GV->isDSOLocal() && !Subtarget->genExecuteOnly())
3922 if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3923 return V;
3924
3925 if (isPositionIndependent()) {
3927 GV, dl, PtrVT, 0, GV->isDSOLocal() ? 0 : ARMII::MO_GOT);
3928 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3929 if (!GV->isDSOLocal())
3930 Result =
3931 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3933 return Result;
3934 } else if (Subtarget->isROPI() && IsRO) {
3935 // PC-relative.
3936 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3937 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3938 return Result;
3939 } else if (Subtarget->isRWPI() && !IsRO) {
3940 // SB-relative.
3941 SDValue RelAddr;
3942 if (Subtarget->useMovt()) {
3943 ++NumMovwMovt;
3944 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3945 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3946 } else { // use literal pool for address constant
3949 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3950 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3951 RelAddr = DAG.getLoad(
3952 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3954 }
3955 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3956 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3957 return Result;
3958 }
3959
3960 // If we have T2 ops, we can materialize the address directly via movt/movw
3961 // pair. This is always cheaper. If need to generate Execute Only code, and we
3962 // only have Thumb1 available, we can't use a constant pool and are forced to
3963 // use immediate relocations.
3964 if (Subtarget->useMovt() || Subtarget->genExecuteOnly()) {
3965 if (Subtarget->useMovt())
3966 ++NumMovwMovt;
3967 // FIXME: Once remat is capable of dealing with instructions with register
3968 // operands, expand this into two nodes.
3969 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3970 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3971 } else {
3972 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4));
3973 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3974 return DAG.getLoad(
3975 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3977 }
3978}
3979
3980SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3981 SelectionDAG &DAG) const {
3982 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3983 "ROPI/RWPI not currently supported for Darwin");
3984 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3985 SDLoc dl(Op);
3986 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3987
3988 if (Subtarget->useMovt())
3989 ++NumMovwMovt;
3990
3991 // FIXME: Once remat is capable of dealing with instructions with register
3992 // operands, expand this into multiple nodes
3993 unsigned Wrapper =
3995
3996 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3997 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3998
3999 if (Subtarget->isGVIndirectSymbol(GV))
4000 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
4002 return Result;
4003}
4004
4005SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
4006 SelectionDAG &DAG) const {
4007 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
4008 assert(Subtarget->useMovt() &&
4009 "Windows on ARM expects to use movw/movt");
4010 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
4011 "ROPI/RWPI not currently supported for Windows");
4012
4014 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4015 ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
4016 if (GV->hasDLLImportStorageClass())
4017 TargetFlags = ARMII::MO_DLLIMPORT;
4018 else if (!TM.shouldAssumeDSOLocal(GV))
4019 TargetFlags = ARMII::MO_COFFSTUB;
4020 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4022 SDLoc DL(Op);
4023
4024 ++NumMovwMovt;
4025
4026 // FIXME: Once remat is capable of dealing with instructions with register
4027 // operands, expand this into two nodes.
4028 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
4029 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
4030 TargetFlags));
4031 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
4032 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4034 return Result;
4035}
4036
4037SDValue
4038ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
4039 SDLoc dl(Op);
4040 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
4041 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
4042 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
4043 Op.getOperand(1), Val);
4044}
4045
4046SDValue
4047ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
4048 SDLoc dl(Op);
4049 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
4050 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
4051}
4052
4053SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
4054 SelectionDAG &DAG) const {
4055 SDLoc dl(Op);
4056 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
4057 Op.getOperand(0));
4058}
4059
4060SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
4061 SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
4062 unsigned IntNo =
4063 Op.getConstantOperandVal(Op.getOperand(0).getValueType() == MVT::Other);
4064 switch (IntNo) {
4065 default:
4066 return SDValue(); // Don't custom lower most intrinsics.
4067 case Intrinsic::arm_gnu_eabi_mcount: {
4069 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4070 SDLoc dl(Op);
4071 SDValue Chain = Op.getOperand(0);
4072 // call "\01__gnu_mcount_nc"
4073 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
4074 const uint32_t *Mask =
4076 assert(Mask && "Missing call preserved mask for calling convention");
4077 // Mark LR an implicit live-in.
4078 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4079 SDValue ReturnAddress =
4080 DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
4081 constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
4082 SDValue Callee =
4083 DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
4085 if (Subtarget->isThumb())
4086 return SDValue(
4087 DAG.getMachineNode(
4088 ARM::tBL_PUSHLR, dl, ResultTys,
4089 {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
4090 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
4091 0);
4092 return SDValue(
4093 DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
4094 {ReturnAddress, Callee, RegisterMask, Chain}),
4095 0);
4096 }
4097 }
4098}
4099
4100SDValue
4101ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
4102 const ARMSubtarget *Subtarget) const {
4103 unsigned IntNo = Op.getConstantOperandVal(0);
4104 SDLoc dl(Op);
4105 switch (IntNo) {
4106 default: return SDValue(); // Don't custom lower most intrinsics.
4107 case Intrinsic::thread_pointer: {
4108 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4109 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
4110 }
4111 case Intrinsic::arm_cls: {
4112 const SDValue &Operand = Op.getOperand(1);
4113 const EVT VTy = Op.getValueType();
4114 SDValue SRA =
4115 DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
4116 SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
4117 SDValue SHL =
4118 DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
4119 SDValue OR =
4120 DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
4121 SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
4122 return Result;
4123 }
4124 case Intrinsic::arm_cls64: {
4125 // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
4126 // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
4127 const SDValue &Operand = Op.getOperand(1);
4128 const EVT VTy = Op.getValueType();
4129 SDValue Lo, Hi;
4130 std::tie(Lo, Hi) = DAG.SplitScalar(Operand, dl, VTy, VTy);
4131 SDValue Constant0 = DAG.getConstant(0, dl, VTy);
4132 SDValue Constant1 = DAG.getConstant(1, dl, VTy);
4133 SDValue Constant31 = DAG.getConstant(31, dl, VTy);
4134 SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
4135 SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
4136 SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
4137 SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
4138 SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
4139 SDValue CheckLo =
4140 DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
4141 SDValue HiIsZero =
4142 DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
4143 SDValue AdjustedLo =
4144 DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
4145 SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
4146 SDValue Result =
4147 DAG.getSelect(dl, VTy, CheckLo,
4148 DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
4149 return Result;
4150 }
4151 case Intrinsic::eh_sjlj_lsda: {
4154 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
4155 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4156 SDValue CPAddr;
4157 bool IsPositionIndependent = isPositionIndependent();
4158 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
4160 ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
4161 ARMCP::CPLSDA, PCAdj);
4162 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
4163 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4164 SDValue Result = DAG.getLoad(
4165 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4167
4168 if (IsPositionIndependent) {
4169 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
4170 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
4171 }
4172 return Result;
4173 }
4174 case Intrinsic::arm_neon_vabs:
4175 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
4176 Op.getOperand(1));
4177 case Intrinsic::arm_neon_vmulls:
4178 case Intrinsic::arm_neon_vmullu: {
4179 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
4181 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4182 Op.getOperand(1), Op.getOperand(2));
4183 }
4184 case Intrinsic::arm_neon_vminnm:
4185 case Intrinsic::arm_neon_vmaxnm: {
4186 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
4188 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4189 Op.getOperand(1), Op.getOperand(2));
4190 }
4191 case Intrinsic::arm_neon_vminu:
4192 case Intrinsic::arm_neon_vmaxu: {
4193 if (Op.getValueType().isFloatingPoint())
4194 return SDValue();
4195 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
4196 ? ISD::UMIN : ISD::UMAX;
4197 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4198 Op.getOperand(1), Op.getOperand(2));
4199 }
4200 case Intrinsic::arm_neon_vmins:
4201 case Intrinsic::arm_neon_vmaxs: {
4202 // v{min,max}s is overloaded between signed integers and floats.
4203 if (!Op.getValueType().isFloatingPoint()) {
4204 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4205 ? ISD::SMIN : ISD::SMAX;
4206 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4207 Op.getOperand(1), Op.getOperand(2));
4208 }
4209 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4211 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4212 Op.getOperand(1), Op.getOperand(2));
4213 }
4214 case Intrinsic::arm_neon_vtbl1:
4215 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
4216 Op.getOperand(1), Op.getOperand(2));
4217 case Intrinsic::arm_neon_vtbl2:
4218 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
4219 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4220 case Intrinsic::arm_mve_pred_i2v:
4221 case Intrinsic::arm_mve_pred_v2i:
4222 return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),
4223 Op.getOperand(1));
4224 case Intrinsic::arm_mve_vreinterpretq:
4225 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),
4226 Op.getOperand(1));
4227 case Intrinsic::arm_mve_lsll:
4228 return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
4229 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4230 case Intrinsic::arm_mve_asrl:
4231 return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
4232 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4233 }
4234}
4235
4237 const ARMSubtarget *Subtarget) {
4238 SDLoc dl(Op);
4239 auto SSID = static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4240 if (SSID == SyncScope::SingleThread)
4241 return Op;
4242
4243 if (!Subtarget->hasDataBarrier()) {
4244 // Some ARMv6 cpus can support data barriers with an mcr instruction.
4245 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
4246 // here.
4247 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
4248 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
4249 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
4250 DAG.getConstant(0, dl, MVT::i32));
4251 }
4252
4253 AtomicOrdering Ord =
4254 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4256 if (Subtarget->isMClass()) {
4257 // Only a full system barrier exists in the M-class architectures.
4259 } else if (Subtarget->preferISHSTBarriers() &&
4260 Ord == AtomicOrdering::Release) {
4261 // Swift happens to implement ISHST barriers in a way that's compatible with
4262 // Release semantics but weaker than ISH so we'd be fools not to use
4263 // it. Beware: other processors probably don't!
4265 }
4266
4267 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
4268 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
4269 DAG.getConstant(Domain, dl, MVT::i32));
4270}
4271
4273 const ARMSubtarget *Subtarget) {
4274 // ARM pre v5TE and Thumb1 does not have preload instructions.
4275 if (!(Subtarget->isThumb2() ||
4276 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
4277 // Just preserve the chain.
4278 return Op.getOperand(0);
4279
4280 SDLoc dl(Op);
4281 unsigned isRead = ~Op.getConstantOperandVal(2) & 1;
4282 if (!isRead &&
4283 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4284 // ARMv7 with MP extension has PLDW.
4285 return Op.getOperand(0);
4286
4287 unsigned isData = Op.getConstantOperandVal(4);
4288 if (Subtarget->isThumb()) {
4289 // Invert the bits.
4290 isRead = ~isRead & 1;
4291 isData = ~isData & 1;
4292 }
4293
4294 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
4295 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
4296 DAG.getConstant(isData, dl, MVT::i32));
4297}
4298
4301 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
4302
4303 // vastart just stores the address of the VarArgsFrameIndex slot into the
4304 // memory location argument.
4305 SDLoc dl(Op);
4307 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4308 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4309 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4310 MachinePointerInfo(SV));
4311}
4312
4313SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
4314 CCValAssign &NextVA,
4315 SDValue &Root,
4316 SelectionDAG &DAG,
4317 const SDLoc &dl) const {
4320
4321 const TargetRegisterClass *RC;
4322 if (AFI->isThumb1OnlyFunction())
4323 RC = &ARM::tGPRRegClass;
4324 else
4325 RC = &ARM::GPRRegClass;
4326
4327 // Transform the arguments stored in physical registers into virtual ones.
4328 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4329 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4330
4331 SDValue ArgValue2;
4332 if (NextVA.isMemLoc()) {
4333 MachineFrameInfo &MFI = MF.getFrameInfo();
4334 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
4335
4336 // Create load node to retrieve arguments from the stack.
4337 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4338 ArgValue2 = DAG.getLoad(
4339 MVT::i32, dl, Root, FIN,
4341 } else {
4342 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
4343 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4344 }
4345 if (!Subtarget->isLittle())
4346 std::swap (ArgValue, ArgValue2);
4347 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
4348}
4349
4350// The remaining GPRs hold either the beginning of variable-argument
4351// data, or the beginning of an aggregate passed by value (usually
4352// byval). Either way, we allocate stack slots adjacent to the data
4353// provided by our caller, and store the unallocated registers there.
4354// If this is a variadic function, the va_list pointer will begin with
4355// these values; otherwise, this reassembles a (byval) structure that
4356// was split between registers and memory.
4357// Return: The frame index registers were stored into.
4358int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
4359 const SDLoc &dl, SDValue &Chain,
4360 const Value *OrigArg,
4361 unsigned InRegsParamRecordIdx,
4362 int ArgOffset, unsigned ArgSize) const {
4363 // Currently, two use-cases possible:
4364 // Case #1. Non-var-args function, and we meet first byval parameter.
4365 // Setup first unallocated register as first byval register;
4366 // eat all remained registers
4367 // (these two actions are performed by HandleByVal method).
4368 // Then, here, we initialize stack frame with
4369 // "store-reg" instructions.
4370 // Case #2. Var-args function, that doesn't contain byval parameters.
4371 // The same: eat all remained unallocated registers,
4372 // initialize stack frame.
4373
4375 MachineFrameInfo &MFI = MF.getFrameInfo();
4377 unsigned RBegin, REnd;
4378 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
4379 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
4380 } else {
4381 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4382 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
4383 REnd = ARM::R4;
4384 }
4385
4386 if (REnd != RBegin)
4387 ArgOffset = -4 * (ARM::R4 - RBegin);
4388
4389 auto PtrVT = getPointerTy(DAG.getDataLayout());
4390 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
4391 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
4392
4394 const TargetRegisterClass *RC =
4395 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
4396
4397 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
4398 Register VReg = MF.addLiveIn(Reg, RC);
4399 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4400 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4401 MachinePointerInfo(OrigArg, 4 * i));
4402 MemOps.push_back(Store);
4403 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
4404 }
4405
4406 if (!MemOps.empty())
4407 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4408 return FrameIndex;
4409}
4410
4411// Setup stack frame, the va_list pointer will start from.
4412void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
4413 const SDLoc &dl, SDValue &Chain,
4414 unsigned ArgOffset,
4415 unsigned TotalArgRegsSaveSize,
4416 bool ForceMutable) const {
4419
4420 // Try to store any remaining integer argument regs
4421 // to their spots on the stack so that they may be loaded by dereferencing
4422 // the result of va_next.
4423 // If there is no regs to be stored, just point address after last
4424 // argument passed via stack.
4425 int FrameIndex = StoreByValRegs(
4426 CCInfo, DAG, dl, Chain, nullptr, CCInfo.getInRegsParamsCount(),
4427 CCInfo.getStackSize(), std::max(4U, TotalArgRegsSaveSize));
4428 AFI->setVarArgsFrameIndex(FrameIndex);
4429}
4430
4431bool ARMTargetLowering::splitValueIntoRegisterParts(
4432 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
4433 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
4434 EVT ValueVT = Val.getValueType();
4435 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4436 unsigned ValueBits = ValueVT.getSizeInBits();
4437 unsigned PartBits = PartVT.getSizeInBits();
4438 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
4439 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
4440 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
4441 Parts[0] = Val;
4442 return true;
4443 }
4444 return false;
4445}
4446
4447SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
4448 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
4449 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
4450 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4451 unsigned ValueBits = ValueVT.getSizeInBits();
4452 unsigned PartBits = PartVT.getSizeInBits();
4453 SDValue Val = Parts[0];
4454
4455 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
4456 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
4457 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
4458 return Val;
4459 }
4460 return SDValue();
4461}
4462
4463SDValue ARMTargetLowering::LowerFormalArguments(
4464 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4465 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4466 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4468 MachineFrameInfo &MFI = MF.getFrameInfo();
4469
4471
4472 // Assign locations to all of the incoming arguments.
4474 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4475 *DAG.getContext());
4476 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
4477
4478 SmallVector<SDValue, 16> ArgValues;
4479 SDValue ArgValue;
4481 unsigned CurArgIdx = 0;
4482
4483 // Initially ArgRegsSaveSize is zero.
4484 // Then we increase this value each time we meet byval parameter.
4485 // We also increase this value in case of varargs function.
4486 AFI->setArgRegsSaveSize(0);
4487
4488 // Calculate the amount of stack space that we need to allocate to store
4489 // byval and variadic arguments that are passed in registers.
4490 // We need to know this before we allocate the first byval or variadic
4491 // argument, as they will be allocated a stack slot below the CFA (Canonical
4492 // Frame Address, the stack pointer at entry to the function).
4493 unsigned ArgRegBegin = ARM::R4;
4494 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4495 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
4496 break;
4497
4498 CCValAssign &VA = ArgLocs[i];
4499 unsigned Index = VA.getValNo();
4500 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
4501 if (!Flags.isByVal())
4502 continue;
4503
4504 assert(VA.isMemLoc() && "unexpected byval pointer in reg");
4505 unsigned RBegin, REnd;
4506 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
4507 ArgRegBegin = std::min(ArgRegBegin, RBegin);
4508
4509 CCInfo.nextInRegsParam();
4510 }
4511 CCInfo.rewindByValRegsInfo();
4512
4513 int lastInsIndex = -1;
4514 if (isVarArg && MFI.hasVAStart()) {
4515 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4516 if (RegIdx != std::size(GPRArgRegs))
4517 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
4518 }
4519
4520 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4521 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
4522 auto PtrVT = getPointerTy(DAG.getDataLayout());
4523
4524 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4525 CCValAssign &VA = ArgLocs[i];
4526 if (Ins[VA.getValNo()].isOrigArg()) {
4527 std::advance(CurOrigArg,
4528 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
4529 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
4530 }
4531 // Arguments stored in registers.
4532 if (VA.isRegLoc()) {
4533 EVT RegVT = VA.getLocVT();
4534
4535 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
4536 // f64 and vector types are split up into multiple registers or
4537 // combinations of registers and stack slots.
4538 SDValue ArgValue1 =
4539 GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4540 VA = ArgLocs[++i]; // skip ahead to next loc
4541 SDValue ArgValue2;
4542 if (VA.isMemLoc()) {
4543 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
4544 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4545 ArgValue2 = DAG.getLoad(
4546 MVT::f64, dl, Chain, FIN,
4548 } else {
4549 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4550 }
4551 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
4552 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4553 ArgValue1, DAG.getIntPtrConstant(0, dl));
4554 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4555 ArgValue2, DAG.getIntPtrConstant(1, dl));
4556 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
4557 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4558 } else {
4559 const TargetRegisterClass *RC;
4560
4561 if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4562 RC = &ARM::HPRRegClass;
4563 else if (RegVT == MVT::f32)
4564 RC = &ARM::SPRRegClass;
4565 else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||
4566 RegVT == MVT::v4bf16)
4567 RC = &ARM::DPRRegClass;
4568 else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||
4569 RegVT == MVT::v8bf16)
4570 RC = &ARM::QPRRegClass;
4571 else if (RegVT == MVT::i32)
4572 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4573 : &ARM::GPRRegClass;
4574 else
4575 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
4576
4577 // Transform the arguments in physical registers into virtual ones.
4578 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4579 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4580
4581 // If this value is passed in r0 and has the returned attribute (e.g.
4582 // C++ 'structors), record this fact for later use.
4583 if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
4584 AFI->setPreservesR0();
4585 }
4586 }
4587
4588 // If this is an 8 or 16-bit value, it is really passed promoted
4589 // to 32 bits. Insert an assert[sz]ext to capture this, then
4590 // truncate to the right size.
4591 switch (VA.getLocInfo()) {
4592 default: llvm_unreachable("Unknown loc info!");
4593 case CCValAssign::Full: break;
4594 case CCValAssign::BCvt:
4595 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
4596 break;
4597 case CCValAssign::SExt:
4598 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
4599 DAG.getValueType(VA.getValVT()));
4600 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4601 break;
4602 case CCValAssign::ZExt:
4603 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
4604 DAG.getValueType(VA.getValVT()));
4605 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4606 break;
4607 }
4608
4609 // f16 arguments have their size extended to 4 bytes and passed as if they
4610 // had been copied to the LSBs of a 32-bit register.
4611 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
4612 if (VA.needsCustom() &&
4613 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
4614 ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue);
4615
4616 InVals.push_back(ArgValue);
4617 } else { // VA.isRegLoc()
4618 // Only arguments passed on the stack should make it here.
4619 assert(VA.isMemLoc());
4620 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
4621
4622 int index = VA.getValNo();
4623
4624 // Some Ins[] entries become multiple ArgLoc[] entries.
4625 // Process them only once.
4626 if (index != lastInsIndex)
4627 {
4628 ISD::ArgFlagsTy Flags = Ins[index].Flags;
4629 // FIXME: For now, all byval parameter objects are marked mutable.
4630 // This can be changed with more analysis.
4631 // In case of tail call optimization mark all arguments mutable.
4632 // Since they could be overwritten by lowering of arguments in case of
4633 // a tail call.
4634 if (Flags.isByVal()) {
4635 assert(Ins[index].isOrigArg() &&
4636 "Byval arguments cannot be implicit");
4637 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
4638
4639 int FrameIndex = StoreByValRegs(
4640 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4641 VA.getLocMemOffset(), Flags.getByValSize());
4642 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
4643 CCInfo.nextInRegsParam();
4644 } else {
4645 unsigned FIOffset = VA.getLocMemOffset();
4646 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
4647 FIOffset, true);
4648
4649 // Create load nodes to retrieve arguments from the stack.
4650 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4651 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
4653 DAG.getMachineFunction(), FI)));
4654 }
4655 lastInsIndex = index;
4656 }
4657 }
4658 }
4659
4660 // varargs
4661 if (isVarArg && MFI.hasVAStart()) {
4662 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getStackSize(),
4663 TotalArgRegsSaveSize);
4664 if (AFI->isCmseNSEntryFunction()) {
4667 "secure entry function must not be variadic", dl.getDebugLoc());
4668 DAG.getContext()->diagnose(Diag);
4669 }
4670 }
4671
4672 unsigned StackArgSize = CCInfo.getStackSize();
4673 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
4674 if (canGuaranteeTCO(CallConv, TailCallOpt)) {
4675 // The only way to guarantee a tail call is if the callee restores its
4676 // argument area, but it must also keep the stack aligned when doing so.
4677 const DataLayout &DL = DAG.getDataLayout();
4678 StackArgSize = alignTo(StackArgSize, DL.getStackAlignment());
4679
4680 AFI->setArgumentStackToRestore(StackArgSize);
4681 }
4682 AFI->setArgumentStackSize(StackArgSize);
4683
4684 if (CCInfo.getStackSize() > 0 && AFI->isCmseNSEntryFunction()) {
4687 "secure entry function requires arguments on stack", dl.getDebugLoc());
4688 DAG.getContext()->diagnose(Diag);
4689 }
4690
4691 return Chain;
4692}
4693
4694/// isFloatingPointZero - Return true if this is +0.0.
4696 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
4697 return CFP->getValueAPF().isPosZero();
4698 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
4699 // Maybe this has already been legalized into the constant pool?
4700 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
4701 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
4702 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
4703 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
4704 return CFP->getValueAPF().isPosZero();
4705 }
4706 } else if (Op->getOpcode() == ISD::BITCAST &&
4707 Op->getValueType(0) == MVT::f64) {
4708 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
4709 // created by LowerConstantFP().
4710 SDValue BitcastOp = Op->getOperand(0);
4711 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
4712 isNullConstant(BitcastOp->getOperand(0)))
4713 return true;
4714 }
4715 return false;
4716}
4717
4718/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
4719/// the given operands.
4720SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4721 SDValue &ARMcc, SelectionDAG &DAG,
4722 const SDLoc &dl) const {
4723 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
4724 unsigned C = RHSC->getZExtValue();
4725 if (!isLegalICmpImmediate((int32_t)C)) {
4726 // Constant does not fit, try adjusting it by one.
4727 switch (CC) {
4728 default: break;
4729 case ISD::SETLT:
4730 case ISD::SETGE:
4731 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4733 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4734 }
4735 break;
4736 case ISD::SETULT:
4737 case ISD::SETUGE:
4738 if (C != 0 && isLegalICmpImmediate(C-1)) {
4740 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4741 }
4742 break;
4743 case ISD::SETLE:
4744 case ISD::SETGT:
4745 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
4747 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4748 }
4749 break;
4750 case ISD::SETULE:
4751 case ISD::SETUGT:
4752 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
4754 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4755 }
4756 break;
4757 }
4758 }
4759 } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
4761 // In ARM and Thumb-2, the compare instructions can shift their second
4762 // operand.
4764 std::swap(LHS, RHS);
4765 }
4766
4767 // Thumb1 has very limited immediate modes, so turning an "and" into a
4768 // shift can save multiple instructions.
4769 //
4770 // If we have (x & C1), and C1 is an appropriate mask, we can transform it
4771 // into "((x << n) >> n)". But that isn't necessarily profitable on its
4772 // own. If it's the operand to an unsigned comparison with an immediate,
4773 // we can eliminate one of the shifts: we transform
4774 // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)".
4775 //
4776 // We avoid transforming cases which aren't profitable due to encoding
4777 // details:
4778 //
4779 // 1. C2 fits into the immediate field of a cmp, and the transformed version
4780 // would not; in that case, we're essentially trading one immediate load for
4781 // another.
4782 // 2. C1 is 255 or 65535, so we can use uxtb or uxth.
4783 // 3. C2 is zero; we have other code for this special case.
4784 //
4785 // FIXME: Figure out profitability for Thumb2; we usually can't save an
4786 // instruction, since the AND is always one instruction anyway, but we could
4787 // use narrow instructions in some cases.
4788 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
4789 LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4790 LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&
4791 !isSignedIntSetCC(CC)) {
4792 unsigned Mask = LHS.getConstantOperandVal(1);
4793 auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
4794 uint64_t RHSV = RHSC->getZExtValue();
4795 if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
4796 unsigned ShiftBits = llvm::countl_zero(Mask);
4797 if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4798 SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);
4799 LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);
4800 RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32);
4801 }
4802 }
4803 }
4804
4805 // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a
4806 // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same
4807 // way a cmp would.
4808 // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and
4809 // some tweaks to the heuristics for the previous and->shift transform.
4810 // FIXME: Optimize cases where the LHS isn't a shift.
4811 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
4812 isa<ConstantSDNode>(RHS) && RHS->getAsZExtVal() == 0x80000000U &&
4813 CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4814 LHS.getConstantOperandVal(1) < 31) {
4815 unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1;
4816 SDValue Shift = DAG.getNode(ARMISD::LSLS, dl,
4817 DAG.getVTList(MVT::i32, MVT::i32),
4818 LHS.getOperand(0),
4819 DAG.getConstant(ShiftAmt, dl, MVT::i32));
4820 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
4821 Shift.getValue(1), SDValue());
4822 ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
4823 return Chain.getValue(1);
4824 }
4825
4827
4828 // If the RHS is a constant zero then the V (overflow) flag will never be
4829 // set. This can allow us to simplify GE to PL or LT to MI, which can be
4830 // simpler for other passes (like the peephole optimiser) to deal with.
4831 if (isNullConstant(RHS)) {
4832 switch (CondCode) {
4833 default: break;
4834 case ARMCC::GE:
4836 break;
4837 case ARMCC::LT:
4839 break;
4840 }
4841 }
4842
4843 ARMISD::NodeType CompareType;
4844 switch (CondCode) {
4845 default:
4846 CompareType = ARMISD::CMP;
4847 break;
4848 case ARMCC::EQ:
4849 case ARMCC::NE:
4850 // Uses only Z Flag
4851 CompareType = ARMISD::CMPZ;
4852 break;
4853 }
4854 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4855 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
4856}
4857
4858/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
4859SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
4860 SelectionDAG &DAG, const SDLoc &dl,
4861 bool Signaling) const {
4862 assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
4863 SDValue Cmp;
4864 if (!isFloatingPointZero(RHS))
4865 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP,
4866 dl, MVT::Glue, LHS, RHS);
4867 else
4868 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0,
4869 dl, MVT::Glue, LHS);
4870 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
4871}
4872
4873/// duplicateCmp - Glue values can have only one use, so this function
4874/// duplicates a comparison node.
4875SDValue
4876ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
4877 unsigned Opc = Cmp.getOpcode();
4878 SDLoc DL(Cmp);
4879 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
4880 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4881
4882 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
4883 Cmp = Cmp.getOperand(0);
4884 Opc = Cmp.getOpcode();
4885 if (Opc == ARMISD::CMPFP)
4886 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4887 else {
4888 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
4889 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
4890 }
4891 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
4892}
4893
4894// This function returns three things: the arithmetic computation itself
4895// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
4896// comparison and the condition code define the case in which the arithmetic
4897// computation *does not* overflow.
4898std::pair<SDValue, SDValue>
4899ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
4900 SDValue &ARMcc) const {
4901 assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
4902
4903 SDValue Value, OverflowCmp;
4904 SDValue LHS = Op.getOperand(0);
4905 SDValue RHS = Op.getOperand(1);
4906 SDLoc dl(Op);
4907
4908 // FIXME: We are currently always generating CMPs because we don't support
4909 // generating CMN through the backend. This is not as good as the natural
4910 // CMP case because it causes a register dependency and cannot be folded
4911 // later.
4912
4913 switch (Op.getOpcode()) {
4914 default:
4915 llvm_unreachable("Unknown overflow instruction!");
4916 case ISD::SADDO:
4917 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4918 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
4919 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4920 break;
4921 case ISD::UADDO:
4922 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4923 // We use ADDC here to correspond to its use in LowerUnsignedALUO.
4924 // We do not use it in the USUBO case as Value may not be used.
4925 Value = DAG.getNode(ARMISD::ADDC, dl,
4926 DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
4927 .getValue(0);
4928 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4929 break;
4930 case ISD::SSUBO:
4931 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4932 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4933 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4934 break;
4935 case ISD::USUBO:
4936 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4937 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4938 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4939 break;
4940 case ISD::UMULO:
4941 // We generate a UMUL_LOHI and then check if the high word is 0.
4942 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4943 Value = DAG.getNode(ISD::UMUL_LOHI, dl,
4944 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4945 LHS, RHS);
4946 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4947 DAG.getConstant(0, dl, MVT::i32));
4948 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4949 break;
4950 case ISD::SMULO:
4951 // We generate a SMUL_LOHI and then check if all the bits of the high word
4952 // are the same as the sign bit of the low word.
4953 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4954 Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4955 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4956 LHS, RHS);
4957 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4958 DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4959 Value.getValue(0),
4960 DAG.getConstant(31, dl, MVT::i32)));
4961 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4962 break;
4963 } // switch (...)
4964
4965 return std::make_pair(Value, OverflowCmp);
4966}
4967
4968SDValue
4969ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
4970 // Let legalize expand this if it isn't a legal type yet.
4971 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
4972 return SDValue();
4973
4974 SDValue Value, OverflowCmp;
4975 SDValue ARMcc;
4976 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
4977 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4978 SDLoc dl(Op);
4979 // We use 0 and 1 as false and true values.
4980 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
4981 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
4982 EVT VT = Op.getValueType();
4983
4984 SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
4985 ARMcc, CCR, OverflowCmp);
4986
4987 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
4988 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4989}
4990
4992 SelectionDAG &DAG) {
4993 SDLoc DL(BoolCarry);
4994 EVT CarryVT = BoolCarry.getValueType();
4995
4996 // This converts the boolean value carry into the carry flag by doing
4997 // ARMISD::SUBC Carry, 1
4998 SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
4999 DAG.getVTList(CarryVT, MVT::i32),
5000 BoolCarry, DAG.getConstant(1, DL, CarryVT));
5001 return Carry.getValue(1);
5002}
5003
5005 SelectionDAG &DAG) {
5006 SDLoc DL(Flags);
5007
5008 // Now convert the carry flag into a boolean carry. We do this
5009 // using ARMISD:ADDE 0, 0, Carry
5010 return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
5011 DAG.getConstant(0, DL, MVT::i32),
5012 DAG.getConstant(0, DL, MVT::i32), Flags);
5013}
5014
5015SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
5016 SelectionDAG &DAG) const {
5017 // Let legalize expand this if it isn't a legal type yet.
5018 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
5019 return SDValue();
5020
5021 SDValue LHS = Op.getOperand(0);
5022 SDValue RHS = Op.getOperand(1);
5023 SDLoc dl(Op);
5024
5025 EVT VT = Op.getValueType();
5026 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
5027 SDValue Value;
5028 SDValue Overflow;
5029 switch (Op.getOpcode()) {
5030 default:
5031 llvm_unreachable("Unknown overflow instruction!");
5032 case ISD::UADDO:
5033 Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
5034 // Convert the carry flag into a boolean value.
5035 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5036 break;
5037 case ISD::USUBO: {
5038 Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
5039 // Convert the carry flag into a boolean value.
5040 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5041 // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
5042 // value. So compute 1 - C.
5043 Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
5044 DAG.getConstant(1, dl, MVT::i32), Overflow);
5045 break;
5046 }
5047 }
5048
5049 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
5050}
5051
5053 const ARMSubtarget *Subtarget) {
5054 EVT VT = Op.getValueType();
5055 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || Subtarget->isThumb1Only())
5056 return SDValue();
5057 if (!VT.isSimple())
5058 return SDValue();
5059
5060 unsigned NewOpcode;
5061 switch (VT.getSimpleVT().SimpleTy) {
5062 default:
5063 return SDValue();
5064 case MVT::i8:
5065 switch (Op->getOpcode()) {
5066 case ISD::UADDSAT:
5067 NewOpcode = ARMISD::UQADD8b;
5068 break;
5069 case ISD::SADDSAT:
5070 NewOpcode = ARMISD::QADD8b;
5071 break;
5072 case ISD::USUBSAT:
5073 NewOpcode = ARMISD::UQSUB8b;
5074 break;
5075 case ISD::SSUBSAT:
5076 NewOpcode = ARMISD::QSUB8b;
5077 break;
5078 }
5079 break;
5080 case MVT::i16:
5081 switch (Op->getOpcode()) {
5082 case ISD::UADDSAT:
5083 NewOpcode = ARMISD::UQADD16b;
5084 break;
5085 case ISD::SADDSAT:
5086 NewOpcode = ARMISD::QADD16b;
5087 break;
5088 case ISD::USUBSAT:
5089 NewOpcode = ARMISD::UQSUB16b;
5090 break;
5091 case ISD::SSUBSAT:
5092 NewOpcode = ARMISD::QSUB16b;
5093 break;
5094 }
5095 break;
5096 }
5097
5098 SDLoc dl(Op);
5099 SDValue Add =
5100 DAG.getNode(NewOpcode, dl, MVT::i32,
5101 DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
5102 DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
5103 return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
5104}
5105
5106SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
5107 SDValue Cond = Op.getOperand(0);
5108 SDValue SelectTrue = Op.getOperand(1);
5109 SDValue SelectFalse = Op.getOperand(2);
5110 SDLoc dl(Op);
5111 unsigned Opc = Cond.getOpcode();
5112
5113 if (Cond.getResNo() == 1 &&
5114 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5115 Opc == ISD::USUBO)) {
5116 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5117 return SDValue();
5118
5119 SDValue Value, OverflowCmp;
5120 SDValue ARMcc;
5121 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5122 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5123 EVT VT = Op.getValueType();
5124
5125 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
5126 OverflowCmp, DAG);
5127 }
5128
5129 // Convert:
5130 //
5131 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
5132 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
5133 //
5134 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
5135 const ConstantSDNode *CMOVTrue =
5136 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
5137 const ConstantSDNode *CMOVFalse =
5138 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
5139
5140 if (CMOVTrue && CMOVFalse) {
5141 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
5142 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
5143
5144 SDValue True;
5145 SDValue False;
5146 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
5147 True = SelectTrue;
5148 False = SelectFalse;
5149 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
5150 True = SelectFalse;
5151 False = SelectTrue;
5152 }
5153
5154 if (True.getNode() && False.getNode()) {
5155 EVT VT = Op.getValueType();
5156 SDValue ARMcc = Cond.getOperand(2);
5157 SDValue CCR = Cond.getOperand(3);
5158 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
5159 assert(True.getValueType() == VT);
5160 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
5161 }
5162 }
5163 }
5164
5165 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
5166 // undefined bits before doing a full-word comparison with zero.
5167 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
5168 DAG.getConstant(1, dl, Cond.getValueType()));
5169
5170 return DAG.getSelectCC(dl, Cond,
5171 DAG.getConstant(0, dl, Cond.getValueType()),
5172 SelectTrue, SelectFalse, ISD::SETNE);
5173}
5174
5176 bool &swpCmpOps, bool &swpVselOps) {
5177 // Start by selecting the GE condition code for opcodes that return true for
5178 // 'equality'
5179 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
5180 CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE)
5181 CondCode = ARMCC::GE;
5182
5183 // and GT for opcodes that return false for 'equality'.
5184 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
5185 CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT)
5186 CondCode = ARMCC::GT;
5187
5188 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
5189 // to swap the compare operands.
5190 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
5191 CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT)
5192 swpCmpOps = true;
5193
5194 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
5195 // If we have an unordered opcode, we need to swap the operands to the VSEL
5196 // instruction (effectively negating the condition).
5197 //
5198 // This also has the effect of swapping which one of 'less' or 'greater'
5199 // returns true, so we also swap the compare operands. It also switches
5200 // whether we return true for 'equality', so we compensate by picking the
5201 // opposite condition code to our original choice.
5202 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
5203 CC == ISD::SETUGT) {
5204 swpCmpOps = !swpCmpOps;
5205 swpVselOps = !swpVselOps;
5206 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
5207 }
5208
5209 // 'ordered' is 'anything but unordered', so use the VS condition code and
5210 // swap the VSEL operands.
5211 if (CC == ISD::SETO) {
5212 CondCode = ARMCC::VS;
5213 swpVselOps = true;
5214 }
5215
5216 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
5217 // code and swap the VSEL operands. Also do this if we don't care about the
5218 // unordered case.
5219 if (CC == ISD::SETUNE || CC == ISD::SETNE) {
5220 CondCode = ARMCC::EQ;
5221 swpVselOps = true;
5222 }
5223}
5224
5225SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
5226 SDValue TrueVal, SDValue ARMcc, SDValue CCR,
5227 SDValue Cmp, SelectionDAG &DAG) const {
5228 if (!Subtarget->hasFP64() && VT == MVT::f64) {
5230 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
5232 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
5233
5234 SDValue TrueLow = TrueVal.getValue(0);
5235 SDValue TrueHigh = TrueVal.getValue(1);
5236 SDValue FalseLow = FalseVal.getValue(0);
5237 SDValue FalseHigh = FalseVal.getValue(1);
5238
5239 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
5240 ARMcc, CCR, Cmp);
5241 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
5242 ARMcc, CCR, duplicateCmp(Cmp, DAG));
5243
5244 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
5245 } else {
5246 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
5247 Cmp);
5248 }
5249}
5250
5252 return CC == ISD::SETGT || CC == ISD::SETGE;
5253}
5254
5256 return CC == ISD::SETLT || CC == ISD::SETLE;
5257}
5258
5259// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
5260// All of these conditions (and their <= and >= counterparts) will do:
5261// x < k ? k : x
5262// x > k ? x : k
5263// k < x ? x : k
5264// k > x ? k : x
5265static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
5266 const SDValue TrueVal, const SDValue FalseVal,
5267 const ISD::CondCode CC, const SDValue K) {
5268 return (isGTorGE(CC) &&
5269 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
5270 (isLTorLE(CC) &&
5271 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
5272}
5273
5274// Check if two chained conditionals could be converted into SSAT or USAT.
5275//
5276// SSAT can replace a set of two conditional selectors that bound a number to an
5277// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
5278//
5279// x < -k ? -k : (x > k ? k : x)
5280// x < -k ? -k : (x < k ? x : k)
5281// x > -k ? (x > k ? k : x) : -k
5282// x < k ? (x < -k ? -k : x) : k
5283// etc.
5284//
5285// LLVM canonicalizes these to either a min(max()) or a max(min())
5286// pattern. This function tries to match one of these and will return a SSAT
5287// node if successful.
5288//
5289// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
5290// is a power of 2.
5292 EVT VT = Op.getValueType();
5293 SDValue V1 = Op.getOperand(0);
5294 SDValue K1 = Op.getOperand(1);
5295 SDValue TrueVal1 = Op.getOperand(2);
5296 SDValue FalseVal1 = Op.getOperand(3);
5297 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5298
5299 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
5300 if (Op2.getOpcode() != ISD::SELECT_CC)
5301 return SDValue();
5302
5303 SDValue V2 = Op2.getOperand(0);
5304 SDValue K2 = Op2.getOperand(1);
5305 SDValue TrueVal2 = Op2.getOperand(2);
5306 SDValue FalseVal2 = Op2.getOperand(3);
5307 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
5308
5309 SDValue V1Tmp = V1;
5310 SDValue V2Tmp = V2;
5311
5312 // Check that the registers and the constants match a max(min()) or min(max())
5313 // pattern
5314 if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
5315 K2 != FalseVal2 ||
5316 !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
5317 return SDValue();
5318
5319 // Check that the constant in the lower-bound check is
5320 // the opposite of the constant in the upper-bound check
5321 // in 1's complement.
5322 if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))
5323 return SDValue();
5324
5325 int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
5326 int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
5327 int64_t PosVal = std::max(Val1, Val2);
5328 int64_t NegVal = std::min(Val1, Val2);
5329
5330 if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
5331 !isPowerOf2_64(PosVal + 1))
5332 return SDValue();
5333
5334 // Handle the difference between USAT (unsigned) and SSAT (signed)
5335 // saturation
5336 // At this point, PosVal is guaranteed to be positive
5337 uint64_t K = PosVal;
5338 SDLoc dl(Op);
5339 if (Val1 == ~Val2)
5340 return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
5341 DAG.getConstant(llvm::countr_one(K), dl, VT));
5342 if (NegVal == 0)
5343 return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
5344 DAG.getConstant(llvm::countr_one(K), dl, VT));
5345
5346 return SDValue();
5347}
5348
5349// Check if a condition of the type x < k ? k : x can be converted into a
5350// bit operation instead of conditional moves.
5351// Currently this is allowed given:
5352// - The conditions and values match up
5353// - k is 0 or -1 (all ones)
5354// This function will not check the last condition, thats up to the caller
5355// It returns true if the transformation can be made, and in such case
5356// returns x in V, and k in SatK.
5358 SDValue &SatK)
5359{
5360 SDValue LHS = Op.getOperand(0);
5361 SDValue RHS = Op.getOperand(1);
5362 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5363 SDValue TrueVal = Op.getOperand(2);
5364 SDValue FalseVal = Op.getOperand(3);
5365
5366 SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
5367 ? &RHS
5368 : nullptr;
5369
5370 // No constant operation in comparison, early out
5371 if (!K)
5372 return false;
5373
5374 SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
5375 V = (KTmp == TrueVal) ? FalseVal : TrueVal;
5376 SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
5377
5378 // If the constant on left and right side, or variable on left and right,
5379 // does not match, early out
5380 if (*K != KTmp || V != VTmp)
5381 return false;
5382
5383 if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
5384 SatK = *K;
5385 return true;
5386 }
5387
5388 return false;
5389}
5390
5391bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
5392 if (VT == MVT::f32)
5393 return !Subtarget->hasVFP2Base();
5394 if (VT == MVT::f64)
5395 return !Subtarget->hasFP64();
5396 if (VT == MVT::f16)
5397 return !Subtarget->hasFullFP16();
5398 return false;
5399}
5400
5401SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5402 EVT VT = Op.getValueType();
5403 SDLoc dl(Op);
5404
5405 // Try to convert two saturating conditional selects into a single SSAT
5406 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5407 if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
5408 return SatValue;
5409
5410 // Try to convert expressions of the form x < k ? k : x (and similar forms)
5411 // into more efficient bit operations, which is possible when k is 0 or -1
5412 // On ARM and Thumb-2 which have flexible operand 2 this will result in
5413 // single instructions. On Thumb the shift and the bit operation will be two
5414 // instructions.
5415 // Only allow this transformation on full-width (32-bit) operations
5416 SDValue LowerSatConstant;
5417 SDValue SatValue;
5418 if (VT == MVT::i32 &&
5419 isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
5420 SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
5421 DAG.getConstant(31, dl, VT));
5422 if (isNullConstant(LowerSatConstant)) {
5423 SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
5424 DAG.getAllOnesConstant(dl, VT));
5425 return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
5426 } else if (isAllOnesConstant(LowerSatConstant))
5427 return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
5428 }
5429
5430 SDValue LHS = Op.getOperand(0);
5431 SDValue RHS = Op.getOperand(1);
5432 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5433 SDValue TrueVal = Op.getOperand(2);
5434 SDValue FalseVal = Op.getOperand(3);
5435 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
5436 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
5437
5438 if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5439 LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
5440 unsigned TVal = CTVal->getZExtValue();
5441 unsigned FVal = CFVal->getZExtValue();
5442 unsigned Opcode = 0;
5443
5444 if (TVal == ~FVal) {
5445 Opcode = ARMISD::CSINV;
5446 } else if (TVal == ~FVal + 1) {
5447 Opcode = ARMISD::CSNEG;
5448 } else if (TVal + 1 == FVal) {
5449 Opcode = ARMISD::CSINC;
5450 } else if (TVal == FVal + 1) {
5451 Opcode = ARMISD::CSINC;
5452 std::swap(TrueVal, FalseVal);
5453 std::swap(TVal, FVal);
5454 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5455 }
5456
5457 if (Opcode) {
5458 // If one of the constants is cheaper than another, materialise the
5459 // cheaper one and let the csel generate the other.
5460 if (Opcode != ARMISD::CSINC &&
5461 HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
5462 std::swap(TrueVal, FalseVal);
5463 std::swap(TVal, FVal);
5464 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5465 }
5466
5467 // Attempt to use ZR checking TVal is 0, possibly inverting the condition
5468 // to get there. CSINC not is invertable like the other two (~(~a) == a,
5469 // -(-a) == a, but (a+1)+1 != a).
5470 if (FVal == 0 && Opcode != ARMISD::CSINC) {
5471 std::swap(TrueVal, FalseVal);
5472 std::swap(TVal, FVal);
5473 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5474 }
5475
5476 // Drops F's value because we can get it by inverting/negating TVal.
5477 FalseVal = TrueVal;
5478
5479 SDValue ARMcc;
5480 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5481 EVT VT = TrueVal.getValueType();
5482 return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
5483 }
5484 }
5485
5486 if (isUnsupportedFloatingType(LHS.getValueType())) {
5488 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5489
5490 // If softenSetCCOperands only returned one value, we should compare it to
5491 // zero.
5492 if (!RHS.getNode()) {
5493 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5494 CC = ISD::SETNE;
5495 }
5496 }
5497
5498 if (LHS.getValueType() == MVT::i32) {
5499 // Try to generate VSEL on ARMv8.
5500 // The VSEL instruction can't use all the usual ARM condition
5501 // codes: it only has two bits to select the condition code, so it's
5502 // constrained to use only GE, GT, VS and EQ.
5503 //
5504 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
5505 // swap the operands of the previous compare instruction (effectively
5506 // inverting the compare condition, swapping 'less' and 'greater') and
5507 // sometimes need to swap the operands to the VSEL (which inverts the
5508 // condition in the sense of firing whenever the previous condition didn't)
5509 if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
5510 TrueVal.getValueType() == MVT::f32 ||
5511 TrueVal.getValueType() == MVT::f64)) {
5513 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
5514 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
5515 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5516 std::swap(TrueVal, FalseVal);
5517 }
5518 }
5519
5520 SDValue ARMcc;
5521 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5522 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5523 // Choose GE over PL, which vsel does now support
5524 if (ARMcc->getAsZExtVal() == ARMCC::PL)
5525 ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
5526 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5527 }
5528
5529 ARMCC::CondCodes CondCode, CondCode2;
5530 FPCCToARMCC(CC, CondCode, CondCode2);
5531
5532 // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
5533 // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
5534 // must use VSEL (limited condition codes), due to not having conditional f16
5535 // moves.
5536 if (Subtarget->hasFPARMv8Base() &&
5537 !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
5538 (TrueVal.getValueType() == MVT::f16 ||
5539 TrueVal.getValueType() == MVT::f32 ||
5540 TrueVal.getValueType() == MVT::f64)) {
5541 bool swpCmpOps = false;
5542 bool swpVselOps = false;
5543 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
5544
5545 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
5546 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
5547 if (swpCmpOps)
5548 std::swap(LHS, RHS);
5549 if (swpVselOps)
5550 std::swap(TrueVal, FalseVal);
5551 }
5552 }
5553
5554 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5555 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5556 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5557 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5558 if (CondCode2 != ARMCC::AL) {
5559 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
5560 // FIXME: Needs another CMP because flag can have but one use.
5561 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
5562 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
5563 }
5564 return Result;
5565}
5566
5567/// canChangeToInt - Given the fp compare operand, return true if it is suitable
5568/// to morph to an integer compare sequence.
5569static bool canChangeToInt(SDValue Op, bool &SeenZero,
5570 const ARMSubtarget *Subtarget) {
5571 SDNode *N = Op.getNode();
5572 if (!N->hasOneUse())
5573 // Otherwise it requires moving the value from fp to integer registers.
5574 return false;
5575 if (!N->getNumValues())
5576 return false;
5577 EVT VT = Op.getValueType();
5578 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5579 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
5580 // vmrs are very slow, e.g. cortex-a8.
5581 return false;
5582
5583 if (isFloatingPointZero(Op)) {
5584 SeenZero = true;
5585 return true;
5586 }
5587 return ISD::isNormalLoad(N);
5588}
5589
5592 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
5593
5594 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
5595 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
5596 Ld->getPointerInfo(), Ld->getAlign(),
5597 Ld->getMemOperand()->getFlags());
5598
5599 llvm_unreachable("Unknown VFP cmp argument!");
5600}
5601
5603 SDValue &RetVal1, SDValue &RetVal2) {
5604 SDLoc dl(Op);
5605
5606 if (isFloatingPointZero(Op)) {
5607 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
5608 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
5609 return;
5610 }
5611
5612 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
5613 SDValue Ptr = Ld->getBasePtr();
5614 RetVal1 =
5615 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5616 Ld->getAlign(), Ld->getMemOperand()->getFlags());
5617
5618 EVT PtrType = Ptr.getValueType();
5619 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
5620 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
5621 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5622 Ld->getPointerInfo().getWithOffset(4),
5623 commonAlignment(Ld->getAlign(), 4),
5624 Ld->getMemOperand()->getFlags());
5625 return;
5626 }
5627
5628 llvm_unreachable("Unknown VFP cmp argument!");
5629}
5630
5631/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
5632/// f32 and even f64 comparisons to integer ones.
5633SDValue
5634ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
5635 SDValue Chain = Op.getOperand(0);
5636 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5637 SDValue LHS = Op.getOperand(2);
5638 SDValue RHS = Op.getOperand(3);
5639 SDValue Dest = Op.getOperand(4);
5640 SDLoc dl(Op);
5641
5642 bool LHSSeenZero = false;
5643 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
5644 bool RHSSeenZero = false;
5645 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
5646 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5647 // If unsafe fp math optimization is enabled and there are no other uses of
5648 // the CMP operands, and the condition code is EQ or NE, we can optimize it
5649 // to an integer comparison.
5650 if (CC == ISD::SETOEQ)
5651 CC = ISD::SETEQ;
5652 else if (CC == ISD::SETUNE)
5653 CC = ISD::SETNE;
5654
5655 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5656 SDValue ARMcc;
5657 if (LHS.getValueType() == MVT::f32) {
5658 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5659 bitcastf32Toi32(LHS, DAG), Mask);
5660 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5661 bitcastf32Toi32(RHS, DAG), Mask);
5662 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5663 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5664 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5665 Chain, Dest, ARMcc, CCR, Cmp);
5666 }
5667
5668 SDValue LHS1, LHS2;
5669 SDValue RHS1, RHS2;
5670 expandf64Toi32(LHS, DAG, LHS1, LHS2);
5671 expandf64Toi32(RHS, DAG, RHS1, RHS2);
5672 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
5673 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
5675 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5676 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5677 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5678 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
5679 }
5680
5681 return SDValue();
5682}
5683
5684SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
5685 SDValue Chain = Op.getOperand(0);
5686 SDValue Cond = Op.getOperand(1);
5687 SDValue Dest = Op.getOperand(2);
5688 SDLoc dl(Op);
5689
5690 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5691 // instruction.
5692 unsigned Opc = Cond.getOpcode();
5693 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5694 !Subtarget->isThumb1Only();
5695 if (Cond.getResNo() == 1 &&
5696 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5697 Opc == ISD::USUBO || OptimizeMul)) {
5698 // Only lower legal XALUO ops.
5699 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5700 return SDValue();
5701
5702 // The actual operation with overflow check.
5703 SDValue Value, OverflowCmp;
5704 SDValue ARMcc;
5705 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5706
5707 // Reverse the condition code.
5709 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5711 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5712 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5713
5714 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5715 OverflowCmp);
5716 }
5717
5718 return SDValue();
5719}
5720
5721SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
5722 SDValue Chain = Op.getOperand(0);
5723 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5724 SDValue LHS = Op.getOperand(2);
5725 SDValue RHS = Op.getOperand(3);
5726 SDValue Dest = Op.getOperand(4);
5727 SDLoc dl(Op);
5728
5729 if (isUnsupportedFloatingType(LHS.getValueType())) {
5731 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5732
5733 // If softenSetCCOperands only returned one value, we should compare it to
5734 // zero.
5735 if (!RHS.getNode()) {
5736 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5737 CC = ISD::SETNE;
5738 }
5739 }
5740
5741 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5742 // instruction.
5743 unsigned Opc = LHS.getOpcode();
5744 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5745 !Subtarget->isThumb1Only();
5746 if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
5747 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5748 Opc == ISD::USUBO || OptimizeMul) &&
5749 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
5750 // Only lower legal XALUO ops.
5751 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
5752 return SDValue();
5753
5754 // The actual operation with overflow check.
5755 SDValue Value, OverflowCmp;
5756 SDValue ARMcc;
5757 std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
5758
5759 if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
5760 // Reverse the condition code.
5762 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5764 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5765 }
5766 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5767
5768 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5769 OverflowCmp);
5770 }
5771
5772 if (LHS.getValueType() == MVT::i32) {
5773 SDValue ARMcc;
5774 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5775 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5776 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5777 Chain, Dest, ARMcc, CCR, Cmp);
5778 }
5779
5780 if (getTargetMachine().Options.UnsafeFPMath &&
5781 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
5782 CC == ISD::SETNE || CC == ISD::SETUNE)) {
5783 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
5784 return Result;
5785 }
5786
5787 ARMCC::CondCodes CondCode, CondCode2;
5788 FPCCToARMCC(CC, CondCode, CondCode2);
5789
5790 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5791 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5792 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5793 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5794 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
5795 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5796 if (CondCode2 != ARMCC::AL) {
5797 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
5798 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
5799 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5800 }
5801 return Res;
5802}
5803
5804SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
5805 SDValue Chain = Op.getOperand(0);
5806 SDValue Table = Op.getOperand(1);
5807 SDValue Index = Op.getOperand(2);
5808 SDLoc dl(Op);
5809
5810 EVT PTy = getPointerTy(DAG.getDataLayout());
5811 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
5812 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
5813 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
5814 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
5815 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
5816 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5817 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
5818 // which does another jump to the destination. This also makes it easier
5819 // to translate it to TBB / TBH later (Thumb2 only).
5820 // FIXME: This might not work if the function is extremely large.
5821 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
5822 Addr, Op.getOperand(2), JTI);
5823 }
5824 if (isPositionIndependent() || Subtarget->isROPI()) {
5825 Addr =
5826 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
5828 Chain = Addr.getValue(1);
5829 Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
5830 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5831 } else {
5832 Addr =
5833 DAG.getLoad(PTy, dl, Chain, Addr,
5835 Chain = Addr.getValue(1);
5836 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5837 }
5838}
5839
5841 EVT VT = Op.getValueType();
5842 SDLoc dl(Op);
5843
5844 if (Op.getValueType().getVectorElementType() == MVT::i32) {
5845 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
5846 return Op;
5847 return DAG.UnrollVectorOp(Op.getNode());
5848 }
5849
5850 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5851
5852 EVT NewTy;
5853 const EVT OpTy = Op.getOperand(0).getValueType();
5854 if (OpTy == MVT::v4f32)
5855 NewTy = MVT::v4i32;
5856 else if (OpTy == MVT::v4f16 && HasFullFP16)
5857 NewTy = MVT::v4i16;
5858 else if (OpTy == MVT::v8f16 && HasFullFP16)
5859 NewTy = MVT::v8i16;
5860 else
5861 llvm_unreachable("Invalid type for custom lowering!");
5862
5863 if (VT != MVT::v4i16 && VT != MVT::v8i16)
5864 return DAG.UnrollVectorOp(Op.getNode());
5865
5866 Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
5867 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
5868}
5869
5870SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
5871 EVT VT = Op.getValueType();
5872 if (VT.isVector())
5873 return LowerVectorFP_TO_INT(Op, DAG);
5874
5875 bool IsStrict = Op->isStrictFPOpcode();
5876 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
5877
5878 if (isUnsupportedFloatingType(SrcVal.getValueType())) {
5879 RTLIB::Libcall LC;
5880 if (Op.getOpcode() == ISD::FP_TO_SINT ||
5881 Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
5882 LC = RTLIB::getFPTOSINT(SrcVal.getValueType(),
5883 Op.getValueType());
5884 else
5885 LC = RTLIB::getFPTOUINT(SrcVal.getValueType(),
5886 Op.getValueType());
5887 SDLoc Loc(Op);
5888 MakeLibCallOptions CallOptions;
5889 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
5891 std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
5892 CallOptions, Loc, Chain);
5893 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
5894 }
5895
5896 // FIXME: Remove this when we have strict fp instruction selection patterns
5897 if (IsStrict) {
5898 SDLoc Loc(Op);
5899 SDValue Result =
5902 Loc, Op.getValueType(), SrcVal);
5903 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
5904 }
5905
5906 return Op;
5907}
5908
5910 const ARMSubtarget *Subtarget) {
5911 EVT VT = Op.getValueType();
5912 EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
5913 EVT FromVT = Op.getOperand(0).getValueType();
5914
5915 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32)
5916 return Op;
5917 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 &&
5918 Subtarget->hasFP64())
5919 return Op;
5920 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 &&
5921 Subtarget->hasFullFP16())
5922 return Op;
5923 if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 &&
5924 Subtarget->hasMVEFloatOps())
5925 return Op;
5926 if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 &&
5927 Subtarget->hasMVEFloatOps())
5928 return Op;
5929
5930 if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16)
5931 return SDValue();
5932
5933 SDLoc DL(Op);
5934 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
5935 unsigned BW = ToVT.getScalarSizeInBits() - IsSigned;
5936 SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
5937 DAG.getValueType(VT.getScalarType()));
5938 SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT,
5939 DAG.getConstant((1 << BW) - 1, DL, VT));
5940 if (IsSigned)
5941 Max = DAG.getNode(ISD::SMAX, DL, VT, Max,
5942 DAG.getConstant(-(1 << BW), DL, VT));
5943 return Max;
5944}
5945
5947 EVT VT = Op.getValueType();
5948 SDLoc dl(Op);
5949
5950 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
5951 if (VT.getVectorElementType() == MVT::f32)
5952 return Op;
5953 return DAG.UnrollVectorOp(Op.getNode());
5954 }
5955
5956 assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||
5957 Op.getOperand(0).getValueType() == MVT::v8i16) &&
5958 "Invalid type for custom lowering!");
5959
5960 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5961
5962 EVT DestVecType;
5963 if (VT == MVT::v4f32)
5964 DestVecType = MVT::v4i32;
5965 else if (VT == MVT::v4f16 && HasFullFP16)
5966 DestVecType = MVT::v4i16;
5967 else if (VT == MVT::v8f16 && HasFullFP16)
5968 DestVecType = MVT::v8i16;
5969 else
5970 return DAG.UnrollVectorOp(Op.getNode());
5971
5972 unsigned CastOpc;
5973 unsigned Opc;
5974 switch (Op.getOpcode()) {
5975 default: llvm_unreachable("Invalid opcode!");
5976 case ISD::SINT_TO_FP:
5977 CastOpc = ISD::SIGN_EXTEND;
5978 Opc = ISD::SINT_TO_FP;
5979 break;
5980 case ISD::UINT_TO_FP:
5981 CastOpc = ISD::ZERO_EXTEND;
5982 Opc = ISD::UINT_TO_FP;
5983 break;
5984 }
5985
5986 Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
5987 return DAG.getNode(Opc, dl, VT, Op);
5988}
5989
5990SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
5991 EVT VT = Op.getValueType();
5992 if (VT.isVector())
5993 return LowerVectorINT_TO_FP(Op, DAG);
5994 if (isUnsupportedFloatingType(VT)) {
5995 RTLIB::Libcall LC;
5996 if (Op.getOpcode() == ISD::SINT_TO_FP)
5997 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
5998 Op.getValueType());
5999 else
6000 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
6001 Op.getValueType());
6002 MakeLibCallOptions CallOptions;
6003 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
6004 CallOptions, SDLoc(Op)).first;
6005 }
6006
6007 return Op;
6008}
6009
6010SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
6011 // Implement fcopysign with a fabs and a conditional fneg.
6012 SDValue Tmp0 = Op.getOperand(0);
6013 SDValue Tmp1 = Op.getOperand(1);
6014 SDLoc dl(Op);
6015 EVT VT = Op.getValueType();
6016 EVT SrcVT = Tmp1.getValueType();
6017 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
6018 Tmp0.getOpcode() == ARMISD::VMOVDRR;
6019 bool UseNEON = !InGPR && Subtarget->hasNEON();
6020
6021 if (UseNEON) {
6022 // Use VBSL to copy the sign bit.
6023 unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80);
6024 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
6025 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
6026 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
6027 if (VT == MVT::f64)
6028 Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6029 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
6030 DAG.getConstant(32, dl, MVT::i32));
6031 else /*if (VT == MVT::f32)*/
6032 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
6033 if (SrcVT == MVT::f32) {
6034 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
6035 if (VT == MVT::f64)
6036 Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6037 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
6038 DAG.getConstant(32, dl, MVT::i32));
6039 } else if (VT == MVT::f32)
6040 Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
6041 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
6042 DAG.getConstant(32, dl, MVT::i32));
6043 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
6044 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
6045
6047 dl, MVT::i32);
6048 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
6049 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
6050 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
6051
6052 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
6053 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
6054 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
6055 if (VT == MVT::f32) {
6056 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
6057 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
6058 DAG.getConstant(0, dl, MVT::i32));
6059 } else {
6060 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
6061 }
6062
6063 return Res;
6064 }
6065
6066 // Bitcast operand 1 to i32.
6067 if (SrcVT == MVT::f64)
6068 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6069 Tmp1).getValue(1);
6070 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
6071
6072 // Or in the signbit with integer operations.
6073 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
6074 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
6075 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
6076 if (VT == MVT::f32) {
6077 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
6078 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
6079 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
6080 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
6081 }
6082
6083 // f64: Or the high part with signbit and then combine two parts.
6084 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6085 Tmp0);
6086 SDValue Lo = Tmp0.getValue(0);
6087 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
6088 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
6089 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
6090}
6091
6092SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
6094 MachineFrameInfo &MFI = MF.getFrameInfo();
6095 MFI.setReturnAddressIsTaken(true);
6096
6098 return SDValue();
6099
6100 EVT VT = Op.getValueType();
6101 SDLoc dl(Op);
6102 unsigned Depth = Op.getConstantOperandVal(0);
6103 if (Depth) {
6104 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
6105 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
6106 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
6107 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
6109 }
6110
6111 // Return LR, which contains the return address. Mark it an implicit live-in.
6112 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
6113 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
6114}
6115
6116SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
6117 const ARMBaseRegisterInfo &ARI =
6118 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
6120 MachineFrameInfo &MFI = MF.getFrameInfo();
6121 MFI.setFrameAddressIsTaken(true);
6122
6123 EVT VT = Op.getValueType();
6124 SDLoc dl(Op); // FIXME probably not meaningful
6125 unsigned Depth = Op.getConstantOperandVal(0);
6126 Register FrameReg = ARI.getFrameRegister(MF);
6127 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
6128 while (Depth--)
6129 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
6131 return FrameAddr;
6132}
6133
6134// FIXME? Maybe this could be a TableGen attribute on some registers and
6135// this table could be generated automatically from RegInfo.
6136Register ARMTargetLowering::getRegisterByName(const char* RegName, LLT VT,
6137 const MachineFunction &MF) const {
6139 .Case("sp", ARM::SP)
6140 .Default(0);
6141 if (Reg)
6142 return Reg;
6143 report_fatal_error(Twine("Invalid register name \""
6144 + StringRef(RegName) + "\"."));
6145}
6146
6147// Result is 64 bit value so split into two 32 bit values and return as a
6148// pair of values.
6150 SelectionDAG &DAG) {
6151 SDLoc DL(N);
6152
6153 // This function is only supposed to be called for i64 type destination.
6154 assert(N->getValueType(0) == MVT::i64
6155 && "ExpandREAD_REGISTER called for non-i64 type result.");
6156
6158 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
6159 N->getOperand(0),
6160 N->getOperand(1));
6161
6162 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
6163 Read.getValue(1)));
6164 Results.push_back(Read.getOperand(0));
6165}
6166
6167/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
6168/// When \p DstVT, the destination type of \p BC, is on the vector
6169/// register bank and the source of bitcast, \p Op, operates on the same bank,
6170/// it might be possible to combine them, such that everything stays on the
6171/// vector register bank.
6172/// \p return The node that would replace \p BT, if the combine
6173/// is possible.
6175 SelectionDAG &DAG) {
6176 SDValue Op = BC->getOperand(0);
6177 EVT DstVT = BC->getValueType(0);
6178
6179 // The only vector instruction that can produce a scalar (remember,
6180 // since the bitcast was about to be turned into VMOVDRR, the source
6181 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
6182 // Moreover, we can do this combine only if there is one use.
6183 // Finally, if the destination type is not a vector, there is not
6184 // much point on forcing everything on the vector bank.
6185 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6186 !Op.hasOneUse())
6187 return SDValue();
6188
6189 // If the index is not constant, we will introduce an additional
6190 // multiply that will stick.
6191 // Give up in that case.
6192 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
6193 if (!Index)
6194 return SDValue();
6195 unsigned DstNumElt = DstVT.getVectorNumElements();
6196
6197 // Compute the new index.
6198 const APInt &APIntIndex = Index->getAPIntValue();
6199 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
6200 NewIndex *= APIntIndex;
6201 // Check if the new constant index fits into i32.
6202 if (NewIndex.getBitWidth() > 32)
6203 return SDValue();
6204
6205 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
6206 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
6207 SDLoc dl(Op);
6208 SDValue ExtractSrc = Op.getOperand(0);
6209 EVT VecVT = EVT::getVectorVT(
6210 *DAG.getContext(), DstVT.getScalarType(),
6211 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
6212 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
6213 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
6214 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
6215}
6216
6217/// ExpandBITCAST - If the target supports VFP, this function is called to
6218/// expand a bit convert where either the source or destination type is i64 to
6219/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
6220/// operand type is illegal (e.g., v2f32 for a target that doesn't support
6221/// vectors), since the legalizer won't know what to do with that.
6222SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
6223 const ARMSubtarget *Subtarget) const {
6224 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6225 SDLoc dl(N);
6226 SDValue Op = N->getOperand(0);
6227
6228 // This function is only supposed to be called for i16 and i64 types, either
6229 // as the source or destination of the bit convert.
6230 EVT SrcVT = Op.getValueType();
6231 EVT DstVT = N->getValueType(0);
6232
6233 if ((SrcVT == MVT::i16 || SrcVT == MVT::i32) &&
6234 (DstVT == MVT::f16 || DstVT == MVT::bf16))
6235 return MoveToHPR(SDLoc(N), DAG, MVT::i32, DstVT.getSimpleVT(),
6236 DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op));
6237
6238 if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
6239 (SrcVT == MVT::f16 || SrcVT == MVT::bf16))
6240 return DAG.getNode(
6241 ISD::TRUNCATE, SDLoc(N), DstVT,
6242 MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op));
6243
6244 if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
6245 return SDValue();
6246
6247 // Turn i64->f64 into VMOVDRR.
6248 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
6249 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
6250 // if we can combine the bitcast with its source.
6252 return Val;
6253 SDValue Lo, Hi;
6254 std::tie(Lo, Hi) = DAG.SplitScalar(Op, dl, MVT::i32, MVT::i32);
6255 return DAG.getNode(ISD::BITCAST, dl, DstVT,
6256 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
6257 }
6258
6259 // Turn f64->i64 into VMOVRRD.
6260 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
6261 SDValue Cvt;
6262 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
6263 SrcVT.getVectorNumElements() > 1)
6264 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6265 DAG.getVTList(MVT::i32, MVT::i32),
6266 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
6267 else
6268 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6269 DAG.getVTList(MVT::i32, MVT::i32), Op);
6270 // Merge the pieces into a single i64 value.
6271 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
6272 }
6273
6274 return SDValue();
6275}
6276
6277/// getZeroVector - Returns a vector of specified type with all zero elements.
6278/// Zero vectors are used to represent vector negation and in those cases
6279/// will be implemented with the NEON VNEG instruction. However, VNEG does
6280/// not support i64 elements, so sometimes the zero vectors will need to be
6281/// explicitly constructed. Regardless, use a canonical VMOV to create the
6282/// zero vector.
6283static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
6284 assert(VT.isVector() && "Expected a vector type");
6285 // The canonical modified immediate encoding of a zero vector is....0!
6286 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
6287 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
6288 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
6289 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6290}
6291
6292/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
6293/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6294SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
6295 SelectionDAG &DAG) const {
6296 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6297 EVT VT = Op.getValueType();
6298 unsigned VTBits = VT.getSizeInBits();
6299 SDLoc dl(Op);
6300 SDValue ShOpLo = Op.getOperand(0);
6301 SDValue ShOpHi = Op.getOperand(1);
6302 SDValue ShAmt = Op.getOperand(2);
6303 SDValue ARMcc;
6304 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6305 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
6306
6307 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
6308
6309 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6310 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6311 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
6312 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6313 DAG.getConstant(VTBits, dl, MVT::i32));
6314 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
6315 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6316 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
6317 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6318 ISD::SETGE, ARMcc, DAG, dl);
6319 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
6320 ARMcc, CCR, CmpLo);
6321
6322 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
6323 SDValue HiBigShift = Opc == ISD::SRA
6324 ? DAG.getNode(Opc, dl, VT, ShOpHi,
6325 DAG.getConstant(VTBits - 1, dl, VT))
6326 : DAG.getConstant(0, dl, VT);
6327 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6328 ISD::SETGE, ARMcc, DAG, dl);
6329 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
6330 ARMcc, CCR, CmpHi);
6331
6332 SDValue Ops[2] = { Lo, Hi };
6333 return DAG.getMergeValues(Ops, dl);
6334}
6335
6336/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
6337/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6338SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
6339 SelectionDAG &DAG) const {
6340 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6341 EVT VT = Op.getValueType();
6342 unsigned VTBits = VT.getSizeInBits();
6343 SDLoc dl(Op);
6344 SDValue ShOpLo = Op.getOperand(0);
6345 SDValue ShOpHi = Op.getOperand(1);
6346 SDValue ShAmt = Op.getOperand(2);
6347 SDValue ARMcc;
6348 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6349
6350 assert(Op.getOpcode() == ISD::SHL_PARTS);
6351 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6352 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6353 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
6354 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
6355 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6356
6357 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6358 DAG.getConstant(VTBits, dl, MVT::i32));
6359 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
6360 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6361 ISD::SETGE, ARMcc, DAG, dl);
6362 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
6363 ARMcc, CCR, CmpHi);
6364
6365 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6366 ISD::SETGE, ARMcc, DAG, dl);
6367 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
6368 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
6369 DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
6370
6371 SDValue Ops[2] = { Lo, Hi };
6372 return DAG.getMergeValues(Ops, dl);
6373}
6374
6375SDValue ARMTargetLowering::LowerGET_ROUNDING(SDValue Op,
6376 SelectionDAG &DAG) const {
6377 // The rounding mode is in bits 23:22 of the FPSCR.
6378 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
6379 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
6380 // so that the shift + and get folded into a bitfield extract.
6381 SDLoc dl(Op);
6382 SDValue Chain = Op.getOperand(0);
6383 SDValue Ops[] = {Chain,
6384 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
6385
6386 SDValue FPSCR =
6387 DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, {MVT::i32, MVT::Other}, Ops);
6388 Chain = FPSCR.getValue(1);
6389 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
6390 DAG.getConstant(1U << 22, dl, MVT::i32));
6391 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
6392 DAG.getConstant(22, dl, MVT::i32));
6393 SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
6394 DAG.getConstant(3, dl, MVT::i32));
6395 return DAG.getMergeValues({And, Chain}, dl);
6396}
6397
6398SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op,
6399 SelectionDAG &DAG) const {
6400 SDLoc DL(Op);
6401 SDValue Chain = Op->getOperand(0);
6402 SDValue RMValue = Op->getOperand(1);
6403
6404 // The rounding mode is in bits 23:22 of the FPSCR.
6405 // The llvm.set.rounding argument value to ARM rounding mode value mapping
6406 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
6407 // ((arg - 1) & 3) << 22).
6408 //
6409 // It is expected that the argument of llvm.set.rounding is within the
6410 // segment [0, 3], so NearestTiesToAway (4) is not handled here. It is
6411 // responsibility of the code generated llvm.set.rounding to ensure this
6412 // condition.
6413
6414 // Calculate new value of FPSCR[23:22].
6415 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
6416 DAG.getConstant(1, DL, MVT::i32));
6417 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
6418 DAG.getConstant(0x3, DL, MVT::i32));
6419 RMValue = DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
6420 DAG.getConstant(ARM::RoundingBitsPos, DL, MVT::i32));
6421
6422 // Get current value of FPSCR.
6423 SDValue Ops[] = {Chain,
6424 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6425 SDValue FPSCR =
6426 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6427 Chain = FPSCR.getValue(1);
6428 FPSCR = FPSCR.getValue(0);
6429
6430 // Put new rounding mode into FPSCR[23:22].
6431 const unsigned RMMask = ~(ARM::Rounding::rmMask << ARM::RoundingBitsPos);
6432 FPSCR = DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6433 DAG.getConstant(RMMask, DL, MVT::i32));
6434 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCR, RMValue);
6435 SDValue Ops2[] = {
6436 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6437 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6438}
6439
6440SDValue ARMTargetLowering::LowerSET_FPMODE(SDValue Op,
6441 SelectionDAG &DAG) const {
6442 SDLoc DL(Op);
6443 SDValue Chain = Op->getOperand(0);
6444 SDValue Mode = Op->getOperand(1);
6445
6446 // Generate nodes to build:
6447 // FPSCR = (FPSCR & FPStatusBits) | (Mode & ~FPStatusBits)
6448 SDValue Ops[] = {Chain,
6449 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6450 SDValue FPSCR =
6451 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6452 Chain = FPSCR.getValue(1);
6453 FPSCR = FPSCR.getValue(0);
6454
6455 SDValue FPSCRMasked =
6456 DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6457 DAG.getConstant(ARM::FPStatusBits, DL, MVT::i32));
6458 SDValue InputMasked =
6459 DAG.getNode(ISD::AND, DL, MVT::i32, Mode,
6460 DAG.getConstant(~ARM::FPStatusBits, DL, MVT::i32));
6461 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCRMasked, InputMasked);
6462
6463 SDValue Ops2[] = {
6464 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6465 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6466}
6467
6468SDValue ARMTargetLowering::LowerRESET_FPMODE(SDValue Op,
6469 SelectionDAG &DAG) const {
6470 SDLoc DL(Op);
6471 SDValue Chain = Op->getOperand(0);
6472
6473 // To get the default FP mode all control bits are cleared:
6474 // FPSCR = FPSCR & (FPStatusBits | FPReservedBits)
6475 SDValue Ops[] = {Chain,
6476 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6477 SDValue FPSCR =
6478 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6479 Chain = FPSCR.getValue(1);
6480 FPSCR = FPSCR.getValue(0);
6481
6482 SDValue FPSCRMasked = DAG.getNode(
6483 ISD::AND, DL, MVT::i32, FPSCR,
6485 SDValue Ops2[] = {Chain,
6486 DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32),
6487 FPSCRMasked};
6488 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6489}
6490
6492 const ARMSubtarget *ST) {
6493 SDLoc dl(N);
6494 EVT VT = N->getValueType(0);
6495 if (VT.isVector() && ST->hasNEON()) {
6496
6497 // Compute the least significant set bit: LSB = X & -X
6498 SDValue X = N->getOperand(0);
6499 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
6500 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
6501
6502 EVT ElemTy = VT.getVectorElementType();
6503
6504 if (ElemTy == MVT::i8) {
6505 // Compute with: cttz(x) = ctpop(lsb - 1)
6506 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6507 DAG.getTargetConstant(1, dl, ElemTy));
6508 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6509 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6510 }
6511
6512 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
6513 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
6514 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
6515 unsigned NumBits = ElemTy.getSizeInBits();
6516 SDValue WidthMinus1 =
6517 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6518 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
6519 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
6520 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
6521 }
6522
6523 // Compute with: cttz(x) = ctpop(lsb - 1)
6524
6525 // Compute LSB - 1.
6526 SDValue Bits;
6527 if (ElemTy == MVT::i64) {
6528 // Load constant 0xffff'ffff'ffff'ffff to register.
6529 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6530 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
6531 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
6532 } else {
6533 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6534 DAG.getTargetConstant(1, dl, ElemTy));
6535 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6536 }
6537 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6538 }
6539
6540 if (!ST->hasV6T2Ops())
6541 return SDValue();
6542
6543 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
6544 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
6545}
6546
6548 const ARMSubtarget *ST) {
6549 EVT VT = N->getValueType(0);
6550 SDLoc DL(N);
6551
6552 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
6553 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
6554 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
6555 "Unexpected type for custom ctpop lowering");
6556
6557 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6558 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
6559 SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
6560 Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
6561
6562 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
6563 unsigned EltSize = 8;
6564 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
6565 while (EltSize != VT.getScalarSizeInBits()) {
6567 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
6568 TLI.getPointerTy(DAG.getDataLayout())));
6569 Ops.push_back(Res);
6570
6571 EltSize *= 2;
6572 NumElts /= 2;
6573 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
6574 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
6575 }
6576
6577 return Res;
6578}
6579
6580/// Getvshiftimm - Check if this is a valid build_vector for the immediate
6581/// operand of a vector shift operation, where all the elements of the
6582/// build_vector must have the same constant integer value.
6583static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
6584 // Ignore bit_converts.
6585 while (Op.getOpcode() == ISD::BITCAST)
6586 Op = Op.getOperand(0);
6587 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
6588 APInt SplatBits, SplatUndef;
6589 unsigned SplatBitSize;
6590 bool HasAnyUndefs;
6591 if (!BVN ||
6592 !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6593 ElementBits) ||
6594 SplatBitSize > ElementBits)
6595 return false;
6596 Cnt = SplatBits.getSExtValue();
6597 return true;
6598}
6599
6600/// isVShiftLImm - Check if this is a valid build_vector for the immediate
6601/// operand of a vector shift left operation. That value must be in the range:
6602/// 0 <= Value < ElementBits for a left shift; or
6603/// 0 <= Value <= ElementBits for a long left shift.
6604static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
6605 assert(VT.isVector() && "vector shift count is not a vector type");
6606 int64_t ElementBits = VT.getScalarSizeInBits();
6607 if (!getVShiftImm(Op, ElementBits, Cnt))
6608 return false;
6609 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6610}
6611
6612/// isVShiftRImm - Check if this is a valid build_vector for the immediate
6613/// operand of a vector shift right operation. For a shift opcode, the value
6614/// is positive, but for an intrinsic the value count must be negative. The
6615/// absolute value must be in the range:
6616/// 1 <= |Value| <= ElementBits for a right shift; or
6617/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
6618static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
6619 int64_t &Cnt) {
6620 assert(VT.isVector() && "vector shift count is not a vector type");
6621 int64_t ElementBits = VT.getScalarSizeInBits();
6622 if (!getVShiftImm(Op, ElementBits, Cnt))
6623 return false;
6624 if (!isIntrinsic)
6625 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6626 if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6627 Cnt = -Cnt;
6628 return true;
6629 }
6630 return false;
6631}
6632
6634 const ARMSubtarget *ST) {
6635 EVT VT = N->getValueType(0);
6636 SDLoc dl(N);
6637 int64_t Cnt;
6638
6639 if (!VT.isVector())
6640 return SDValue();
6641
6642 // We essentially have two forms here. Shift by an immediate and shift by a
6643 // vector register (there are also shift by a gpr, but that is just handled
6644 // with a tablegen pattern). We cannot easily match shift by an immediate in
6645 // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
6646 // For shifting by a vector, we don't have VSHR, only VSHL (which can be
6647 // signed or unsigned, and a negative shift indicates a shift right).
6648 if (N->getOpcode() == ISD::SHL) {
6649 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
6650 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
6651 DAG.getConstant(Cnt, dl, MVT::i32));
6652 return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
6653 N->getOperand(1));
6654 }
6655
6656 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
6657 "unexpected vector shift opcode");
6658
6659 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
6660 unsigned VShiftOpc =
6661 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
6662 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
6663 DAG.getConstant(Cnt, dl, MVT::i32));
6664 }
6665
6666 // Other right shifts we don't have operations for (we use a shift left by a
6667 // negative number).
6668 EVT ShiftVT = N->getOperand(1).getValueType();
6669 SDValue NegatedCount = DAG.getNode(
6670 ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
6671 unsigned VShiftOpc =
6672 (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
6673 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
6674}
6675
6677 const ARMSubtarget *ST) {
6678 EVT VT = N->getValueType(0);
6679 SDLoc dl(N);
6680
6681 // We can get here for a node like i32 = ISD::SHL i32, i64
6682 if (VT != MVT::i64)
6683 return SDValue();
6684
6685 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||
6686 N->getOpcode() == ISD::SHL) &&
6687 "Unknown shift to lower!");
6688
6689 unsigned ShOpc = N->getOpcode();
6690 if (ST->hasMVEIntegerOps()) {
6691 SDValue ShAmt = N->getOperand(1);
6692 unsigned ShPartsOpc = ARMISD::LSLL;
6693 ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);
6694
6695 // If the shift amount is greater than 32 or has a greater bitwidth than 64
6696 // then do the default optimisation
6697 if ((!Con && ShAmt->getValueType(0).getSizeInBits() > 64) ||
6698 (Con && (Con->getAPIntValue() == 0 || Con->getAPIntValue().uge(32))))
6699 return SDValue();
6700
6701 // Extract the lower 32 bits of the shift amount if it's not an i32
6702 if (ShAmt->getValueType(0) != MVT::i32)
6703 ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32);
6704
6705 if (ShOpc == ISD::SRL) {
6706 if (!Con)
6707 // There is no t2LSRLr instruction so negate and perform an lsll if the
6708 // shift amount is in a register, emulating a right shift.
6709 ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6710 DAG.getConstant(0, dl, MVT::i32), ShAmt);
6711 else
6712 // Else generate an lsrl on the immediate shift amount
6713 ShPartsOpc = ARMISD::LSRL;
6714 } else if (ShOpc == ISD::SRA)
6715 ShPartsOpc = ARMISD::ASRL;
6716
6717 // Split Lower/Upper 32 bits of the destination/source
6718 SDValue Lo, Hi;
6719 std::tie(Lo, Hi) =
6720 DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6721 // Generate the shift operation as computed above
6722 Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
6723 ShAmt);
6724 // The upper 32 bits come from the second return value of lsll
6725 Hi = SDValue(Lo.getNode(), 1);
6726 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6727 }
6728
6729 // We only lower SRA, SRL of 1 here, all others use generic lowering.
6730 if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
6731 return SDValue();
6732
6733 // If we are in thumb mode, we don't have RRX.
6734 if (ST->isThumb1Only())
6735 return SDValue();
6736
6737 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
6738 SDValue Lo, Hi;
6739 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6740
6741 // First, build a SRA_GLUE/SRL_GLUE op, which shifts the top part by one and
6742 // captures the result into a carry flag.
6743 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_GLUE:ARMISD::SRA_GLUE;
6744 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
6745
6746 // The low part is an ARMISD::RRX operand, which shifts the carry in.
6747 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
6748
6749 // Merge the pieces into a single i64 value.
6750 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6751}
6752
6754 const ARMSubtarget *ST) {
6755 bool Invert = false;
6756 bool Swap = false;
6757 unsigned Opc = ARMCC::AL;
6758
6759 SDValue Op0 = Op.getOperand(0);
6760 SDValue Op1 = Op.getOperand(1);
6761 SDValue CC = Op.getOperand(2);
6762 EVT VT = Op.getValueType();
6763 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
6764 SDLoc dl(Op);
6765
6766 EVT CmpVT;
6767 if (ST->hasNEON())
6769 else {
6770 assert(ST->hasMVEIntegerOps() &&
6771 "No hardware support for integer vector comparison!");
6772
6773 if (Op.getValueType().getVectorElementType() != MVT::i1)
6774 return SDValue();
6775
6776 // Make sure we expand floating point setcc to scalar if we do not have
6777 // mve.fp, so that we can handle them from there.
6778 if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
6779 return SDValue();
6780
6781 CmpVT = VT;
6782 }
6783
6784 if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
6785 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
6786 // Special-case integer 64-bit equality comparisons. They aren't legal,
6787 // but they can be lowered with a few vector instructions.
6788 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
6789 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
6790 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
6791 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
6792 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
6793 DAG.getCondCode(ISD::SETEQ));
6794 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
6795 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
6796 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
6797 if (SetCCOpcode == ISD::SETNE)
6798 Merged = DAG.getNOT(dl, Merged, CmpVT);
6799 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
6800 return Merged;
6801 }
6802
6803 if (CmpVT.getVectorElementType() == MVT::i64)
6804 // 64-bit comparisons are not legal in general.
6805 return SDValue();
6806
6807 if (Op1.getValueType().isFloatingPoint()) {
6808 switch (SetCCOpcode) {
6809 default: llvm_unreachable("Illegal FP comparison");
6810 case ISD::SETUNE:
6811 case ISD::SETNE:
6812 if (ST->hasMVEFloatOps()) {
6813 Opc = ARMCC::NE; break;
6814 } else {
6815 Invert = true; [[fallthrough]];
6816 }
6817 case ISD::SETOEQ:
6818 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6819 case ISD::SETOLT:
6820 case ISD::SETLT: Swap = true; [[fallthrough]];
6821 case ISD::SETOGT:
6822 case ISD::SETGT: Opc = ARMCC::GT; break;
6823 case ISD::SETOLE:
6824 case ISD::SETLE: Swap = true; [[fallthrough]];
6825 case ISD::SETOGE:
6826 case ISD::SETGE: Opc = ARMCC::GE; break;
6827 case ISD::SETUGE: Swap = true; [[fallthrough]];
6828 case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break;
6829 case ISD::SETUGT: Swap = true; [[fallthrough]];
6830 case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break;
6831 case ISD::SETUEQ: Invert = true; [[fallthrough]];
6832 case ISD::SETONE: {
6833 // Expand this to (OLT | OGT).
6834 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6835 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6836 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6837 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6838 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6839 if (Invert)
6840 Result = DAG.getNOT(dl, Result, VT);
6841 return Result;
6842 }
6843 case ISD::SETUO: Invert = true; [[fallthrough]];
6844 case ISD::SETO: {
6845 // Expand this to (OLT | OGE).
6846 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6847 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6848 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6849 DAG.getConstant(ARMCC::GE, dl, MVT::i32));
6850 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6851 if (Invert)
6852 Result = DAG.getNOT(dl, Result, VT);
6853 return Result;
6854 }
6855 }
6856 } else {
6857 // Integer comparisons.
6858 switch (SetCCOpcode) {
6859 default: llvm_unreachable("Illegal integer comparison");
6860 case ISD::SETNE:
6861 if (ST->hasMVEIntegerOps()) {
6862 Opc = ARMCC::NE; break;
6863 } else {
6864 Invert = true; [[fallthrough]];
6865 }
6866 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6867 case ISD::SETLT: Swap = true; [[fallthrough]];
6868 case ISD::SETGT: Opc = ARMCC::GT; break;
6869 case ISD::SETLE: Swap = true; [[fallthrough]];
6870 case ISD::SETGE: Opc = ARMCC::GE; break;
6871 case ISD::SETULT: Swap = true; [[fallthrough]];
6872 case ISD::SETUGT: Opc = ARMCC::HI; break;
6873 case ISD::SETULE: Swap = true; [[fallthrough]];
6874 case ISD::SETUGE: Opc = ARMCC::HS; break;
6875 }
6876
6877 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
6878 if (ST->hasNEON() && Opc == ARMCC::EQ) {
6879 SDValue AndOp;
6881 AndOp = Op0;
6882 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
6883 AndOp = Op1;
6884
6885 // Ignore bitconvert.
6886 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
6887 AndOp = AndOp.getOperand(0);
6888
6889 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
6890 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
6891 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
6892 SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1);
6893 if (!Invert)
6894 Result = DAG.getNOT(dl, Result, VT);
6895 return Result;
6896 }
6897 }
6898 }
6899
6900 if (Swap)
6901 std::swap(Op0, Op1);
6902
6903 // If one of the operands is a constant vector zero, attempt to fold the
6904 // comparison to a specialized compare-against-zero form.
6906 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::EQ ||
6907 Opc == ARMCC::NE)) {
6908 if (Opc == ARMCC::GE)
6909 Opc = ARMCC::LE;
6910 else if (Opc == ARMCC::GT)
6911 Opc = ARMCC::LT;
6912 std::swap(Op0, Op1);
6913 }
6914
6915 SDValue Result;
6917 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::LE ||
6918 Opc == ARMCC::LT || Opc == ARMCC::NE || Opc == ARMCC::EQ))
6919 Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, Op0,
6920 DAG.getConstant(Opc, dl, MVT::i32));
6921 else
6922 Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6923 DAG.getConstant(Opc, dl, MVT::i32));
6924
6925 Result = DAG.getSExtOrTrunc(Result, dl, VT);
6926
6927 if (Invert)
6928 Result = DAG.getNOT(dl, Result, VT);
6929
6930 return Result;
6931}
6932
6934 SDValue LHS = Op.getOperand(0);
6935 SDValue RHS = Op.getOperand(1);
6936 SDValue Carry = Op.getOperand(2);
6937 SDValue Cond = Op.getOperand(3);
6938 SDLoc DL(Op);
6939
6940 assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
6941
6942 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
6943 // have to invert the carry first.
6944 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
6945 DAG.getConstant(1, DL, MVT::i32), Carry);
6946 // This converts the boolean value carry into the carry flag.
6947 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
6948
6949 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
6950 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
6951
6952 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
6953 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
6954 SDValue ARMcc = DAG.getConstant(
6955 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
6956 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6957 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
6958 Cmp.getValue(1), SDValue());
6959 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
6960 CCR, Chain.getValue(1));
6961}
6962
6963/// isVMOVModifiedImm - Check if the specified splat value corresponds to a
6964/// valid vector constant for a NEON or MVE instruction with a "modified
6965/// immediate" operand (e.g., VMOV). If so, return the encoded value.
6966static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
6967 unsigned SplatBitSize, SelectionDAG &DAG,
6968 const SDLoc &dl, EVT &VT, EVT VectorVT,
6969 VMOVModImmType type) {
6970 unsigned OpCmode, Imm;
6971 bool is128Bits = VectorVT.is128BitVector();
6972
6973 // SplatBitSize is set to the smallest size that splats the vector, so a
6974 // zero vector will always have SplatBitSize == 8. However, NEON modified
6975 // immediate instructions others than VMOV do not support the 8-bit encoding
6976 // of a zero vector, and the default encoding of zero is supposed to be the
6977 // 32-bit version.
6978 if (SplatBits == 0)
6979 SplatBitSize = 32;
6980
6981 switch (SplatBitSize) {
6982 case 8:
6983 if (type != VMOVModImm)
6984 return SDValue();
6985 // Any 1-byte value is OK. Op=0, Cmode=1110.
6986 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
6987 OpCmode = 0xe;
6988 Imm = SplatBits;
6989 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
6990 break;
6991
6992 case 16:
6993 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
6994 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
6995 if ((SplatBits & ~0xff) == 0) {
6996 // Value = 0x00nn: Op=x, Cmode=100x.
6997 OpCmode = 0x8;
6998 Imm = SplatBits;
6999 break;
7000 }
7001 if ((SplatBits & ~0xff00) == 0) {
7002 // Value = 0xnn00: Op=x, Cmode=101x.
7003 OpCmode = 0xa;
7004 Imm = SplatBits >> 8;
7005 break;
7006 }
7007 return SDValue();
7008
7009 case 32:
7010 // NEON's 32-bit VMOV supports splat values where:
7011 // * only one byte is nonzero, or
7012 // * the least significant byte is 0xff and the second byte is nonzero, or
7013 // * the least significant 2 bytes are 0xff and the third is nonzero.
7014 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
7015 if ((SplatBits & ~0xff) == 0) {
7016 // Value = 0x000000nn: Op=x, Cmode=000x.
7017 OpCmode = 0;
7018 Imm = SplatBits;
7019 break;
7020 }
7021 if ((SplatBits & ~0xff00) == 0) {
7022 // Value = 0x0000nn00: Op=x, Cmode=001x.
7023 OpCmode = 0x2;
7024 Imm = SplatBits >> 8;
7025 break;
7026 }
7027 if ((SplatBits & ~0xff0000) == 0) {
7028 // Value = 0x00nn0000: Op=x, Cmode=010x.
7029 OpCmode = 0x4;
7030 Imm = SplatBits >> 16;
7031 break;
7032 }
7033 if ((SplatBits & ~0xff000000) == 0) {
7034 // Value = 0xnn000000: Op=x, Cmode=011x.
7035 OpCmode = 0x6;
7036 Imm = SplatBits >> 24;
7037 break;
7038 }
7039
7040 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
7041 if (type == OtherModImm) return SDValue();
7042
7043 if ((SplatBits & ~0xffff) == 0 &&
7044 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
7045 // Value = 0x0000nnff: Op=x, Cmode=1100.
7046 OpCmode = 0xc;
7047 Imm = SplatBits >> 8;
7048 break;
7049 }
7050
7051 // cmode == 0b1101 is not supported for MVE VMVN
7052 if (type == MVEVMVNModImm)
7053 return SDValue();
7054
7055 if ((SplatBits & ~0xffffff) == 0 &&
7056 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
7057 // Value = 0x00nnffff: Op=x, Cmode=1101.
7058 OpCmode = 0xd;
7059 Imm = SplatBits >> 16;
7060 break;
7061 }
7062
7063 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
7064 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
7065 // VMOV.I32. A (very) minor optimization would be to replicate the value
7066 // and fall through here to test for a valid 64-bit splat. But, then the
7067 // caller would also need to check and handle the change in size.
7068 return SDValue();
7069
7070 case 64: {
7071 if (type != VMOVModImm)
7072 return SDValue();
7073 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
7074 uint64_t BitMask = 0xff;
7075 unsigned ImmMask = 1;
7076 Imm = 0;
7077 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
7078 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
7079 Imm |= ImmMask;
7080 } else if ((SplatBits & BitMask) != 0) {
7081 return SDValue();
7082 }
7083 BitMask <<= 8;
7084 ImmMask <<= 1;
7085 }
7086
7087 if (DAG.getDataLayout().isBigEndian()) {
7088 // Reverse the order of elements within the vector.
7089 unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8;
7090 unsigned Mask = (1 << BytesPerElem) - 1;
7091 unsigned NumElems = 8 / BytesPerElem;
7092 unsigned NewImm = 0;
7093 for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) {
7094 unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask);
7095 NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
7096 }
7097 Imm = NewImm;
7098 }
7099
7100 // Op=1, Cmode=1110.
7101 OpCmode = 0x1e;
7102 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
7103 break;
7104 }
7105
7106 default:
7107 llvm_unreachable("unexpected size for isVMOVModifiedImm");
7108 }
7109
7110 unsigned EncodedVal = ARM_AM::createVMOVModImm(OpCmode, Imm);
7111 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
7112}
7113
7114SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
7115 const ARMSubtarget *ST) const {
7116 EVT VT = Op.getValueType();
7117 bool IsDouble = (VT == MVT::f64);
7118 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
7119 const APFloat &FPVal = CFP->getValueAPF();
7120
7121 // Prevent floating-point constants from using literal loads
7122 // when execute-only is enabled.
7123 if (ST->genExecuteOnly()) {
7124 // We shouldn't trigger this for v6m execute-only
7125 assert((!ST->isThumb1Only() || ST->hasV8MBaselineOps()) &&
7126 "Unexpected architecture");
7127
7128 // If we can represent the constant as an immediate, don't lower it
7129 if (isFPImmLegal(FPVal, VT))
7130 return Op;
7131 // Otherwise, construct as integer, and move to float register
7132 APInt INTVal = FPVal.bitcastToAPInt();
7133 SDLoc DL(CFP);
7134 switch (VT.getSimpleVT().SimpleTy) {
7135 default:
7136 llvm_unreachable("Unknown floating point type!");
7137 break;
7138 case MVT::f64: {
7139 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
7140 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
7141 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
7142 }
7143 case MVT::f32:
7144 return DAG.getNode(ARMISD::VMOVSR, DL, VT,
7145 DAG.getConstant(INTVal, DL, MVT::i32));
7146 }
7147 }
7148
7149 if (!ST->hasVFP3Base())
7150 return SDValue();
7151
7152 // Use the default (constant pool) lowering for double constants when we have
7153 // an SP-only FPU
7154 if (IsDouble && !Subtarget->hasFP64())
7155 return SDValue();
7156
7157 // Try splatting with a VMOV.f32...
7158 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
7159
7160 if (ImmVal != -1) {
7161 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
7162 // We have code in place to select a valid ConstantFP already, no need to
7163 // do any mangling.
7164 return Op;
7165 }
7166
7167 // It's a float and we are trying to use NEON operations where
7168 // possible. Lower it to a splat followed by an extract.
7169 SDLoc DL(Op);
7170 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
7171 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
7172 NewVal);
7173 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
7174 DAG.getConstant(0, DL, MVT::i32));
7175 }
7176
7177 // The rest of our options are NEON only, make sure that's allowed before
7178 // proceeding..
7179 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
7180 return SDValue();
7181
7182 EVT VMovVT;
7183 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
7184
7185 // It wouldn't really be worth bothering for doubles except for one very
7186 // important value, which does happen to match: 0.0. So make sure we don't do
7187 // anything stupid.
7188 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
7189 return SDValue();
7190
7191 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
7192 SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
7193 VMovVT, VT, VMOVModImm);
7194 if (NewVal != SDValue()) {
7195 SDLoc DL(Op);
7196 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
7197 NewVal);
7198 if (IsDouble)
7199 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7200
7201 // It's a float: cast and extract a vector element.
7202 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7203 VecConstant);
7204 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7205 DAG.getConstant(0, DL, MVT::i32));
7206 }
7207
7208 // Finally, try a VMVN.i32
7209 NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
7210 VT, VMVNModImm);
7211 if (NewVal != SDValue()) {
7212 SDLoc DL(Op);
7213 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
7214
7215 if (IsDouble)
7216 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7217
7218 // It's a float: cast and extract a vector element.
7219 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7220 VecConstant);
7221 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7222 DAG.getConstant(0, DL, MVT::i32));
7223 }
7224
7225 return SDValue();
7226}
7227
7228// check if an VEXT instruction can handle the shuffle mask when the
7229// vector sources of the shuffle are the same.
7230static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
7231 unsigned NumElts = VT.getVectorNumElements();
7232
7233 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7234 if (M[0] < 0)
7235 return false;
7236
7237 Imm = M[0];
7238
7239 // If this is a VEXT shuffle, the immediate value is the index of the first
7240 // element. The other shuffle indices must be the successive elements after
7241 // the first one.
7242 unsigned ExpectedElt = Imm;
7243 for (unsigned i = 1; i < NumElts; ++i) {
7244 // Increment the expected index. If it wraps around, just follow it
7245 // back to index zero and keep going.
7246 ++ExpectedElt;
7247 if (ExpectedElt == NumElts)
7248 ExpectedElt = 0;
7249
7250 if (M[i] < 0) continue; // ignore UNDEF indices
7251 if (ExpectedElt != static_cast<unsigned>(M[i]))
7252 return false;
7253 }
7254
7255 return true;
7256}
7257
7258static bool isVEXTMask(ArrayRef<int> M, EVT VT,
7259 bool &ReverseVEXT, unsigned &Imm) {
7260 unsigned NumElts = VT.getVectorNumElements();
7261 ReverseVEXT = false;
7262
7263 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7264 if (M[0] < 0)
7265 return false;
7266
7267 Imm = M[0];
7268
7269 // If this is a VEXT shuffle, the immediate value is the index of the first
7270 // element. The other shuffle indices must be the successive elements after
7271 // the first one.
7272 unsigned ExpectedElt = Imm;
7273 for (unsigned i = 1; i < NumElts; ++i) {
7274 // Increment the expected index. If it wraps around, it may still be
7275 // a VEXT but the source vectors must be swapped.
7276 ExpectedElt += 1;
7277 if (ExpectedElt == NumElts * 2) {
7278 ExpectedElt = 0;
7279 ReverseVEXT = true;
7280 }
7281
7282 if (M[i] < 0) continue; // ignore UNDEF indices
7283 if (ExpectedElt != static_cast<unsigned>(M[i]))
7284 return false;
7285 }
7286
7287 // Adjust the index value if the source operands will be swapped.
7288 if (ReverseVEXT)
7289 Imm -= NumElts;
7290
7291 return true;
7292}
7293
7294static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
7295 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
7296 // range, then 0 is placed into the resulting vector. So pretty much any mask
7297 // of 8 elements can work here.
7298 return VT == MVT::v8i8 && M.size() == 8;
7299}
7300
7301static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
7302 unsigned Index) {
7303 if (Mask.size() == Elements * 2)
7304 return Index / Elements;
7305 return Mask[Index] == 0 ? 0 : 1;
7306}
7307
7308// Checks whether the shuffle mask represents a vector transpose (VTRN) by
7309// checking that pairs of elements in the shuffle mask represent the same index
7310// in each vector, incrementing the expected index by 2 at each step.
7311// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
7312// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
7313// v2={e,f,g,h}
7314// WhichResult gives the offset for each element in the mask based on which
7315// of the two results it belongs to.
7316//
7317// The transpose can be represented either as:
7318// result1 = shufflevector v1, v2, result1_shuffle_mask
7319// result2 = shufflevector v1, v2, result2_shuffle_mask
7320// where v1/v2 and the shuffle masks have the same number of elements
7321// (here WhichResult (see below) indicates which result is being checked)
7322//
7323// or as:
7324// results = shufflevector v1, v2, shuffle_mask
7325// where both results are returned in one vector and the shuffle mask has twice
7326// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
7327// want to check the low half and high half of the shuffle mask as if it were
7328// the other case
7329static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7330 unsigned EltSz = VT.getScalarSizeInBits();
7331 if (EltSz == 64)
7332 return false;
7333
7334 unsigned NumElts = VT.getVectorNumElements();
7335 if (M.size() != NumElts && M.size() != NumElts*2)
7336 return false;
7337
7338 // If the mask is twice as long as the input vector then we need to check the
7339 // upper and lower parts of the mask with a matching value for WhichResult
7340 // FIXME: A mask with only even values will be rejected in case the first
7341 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
7342 // M[0] is used to determine WhichResult
7343 for (unsigned i = 0; i < M.size(); i += NumElts) {
7344 WhichResult = SelectPairHalf(NumElts, M, i);
7345 for (unsigned j = 0; j < NumElts; j += 2) {
7346 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7347 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
7348 return false;
7349 }
7350 }
7351
7352 if (M.size() == NumElts*2)
7353 WhichResult = 0;
7354
7355 return true;
7356}
7357
7358/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
7359/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7360/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
7361static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7362 unsigned EltSz = VT.getScalarSizeInBits();
7363 if (EltSz == 64)
7364 return false;
7365
7366 unsigned NumElts = VT.getVectorNumElements();
7367 if (M.size() != NumElts && M.size() != NumElts*2)
7368 return false;
7369
7370 for (unsigned i = 0; i < M.size(); i += NumElts) {
7371 WhichResult = SelectPairHalf(NumElts, M, i);
7372 for (unsigned j = 0; j < NumElts; j += 2) {
7373 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7374 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
7375 return false;
7376 }
7377 }
7378
7379 if (M.size() == NumElts*2)
7380 WhichResult = 0;
7381
7382 return true;
7383}
7384
7385// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
7386// that the mask elements are either all even and in steps of size 2 or all odd
7387// and in steps of size 2.
7388// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
7389// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
7390// v2={e,f,g,h}
7391// Requires similar checks to that of isVTRNMask with
7392// respect the how results are returned.
7393static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7394 unsigned EltSz = VT.getScalarSizeInBits();
7395 if (EltSz == 64)
7396 return false;
7397
7398 unsigned NumElts = VT.getVectorNumElements();
7399 if (M.size() != NumElts && M.size() != NumElts*2)
7400 return false;
7401
7402 for (unsigned i = 0; i < M.size(); i += NumElts) {
7403 WhichResult = SelectPairHalf(NumElts, M, i);
7404 for (unsigned j = 0; j < NumElts; ++j) {
7405 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
7406 return false;
7407 }
7408 }
7409
7410 if (M.size() == NumElts*2)
7411 WhichResult = 0;
7412
7413 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7414 if (VT.is64BitVector() && EltSz == 32)
7415 return false;
7416
7417 return true;
7418}
7419
7420/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
7421/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7422/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
7423static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7424 unsigned EltSz = VT.getScalarSizeInBits();
7425 if (EltSz == 64)
7426 return false;
7427
7428 unsigned NumElts = VT.getVectorNumElements();
7429 if (M.size() != NumElts && M.size() != NumElts*2)
7430 return false;
7431
7432 unsigned Half = NumElts / 2;
7433 for (unsigned i = 0; i < M.size(); i += NumElts) {
7434 WhichResult = SelectPairHalf(NumElts, M, i);
7435 for (unsigned j = 0; j < NumElts; j += Half) {
7436 unsigned Idx = WhichResult;
7437 for (unsigned k = 0; k < Half; ++k) {
7438 int MIdx = M[i + j + k];
7439 if (MIdx >= 0 && (unsigned) MIdx != Idx)
7440 return false;
7441 Idx += 2;
7442 }
7443 }
7444 }
7445
7446 if (M.size() == NumElts*2)
7447 WhichResult = 0;
7448
7449 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7450 if (VT.is64BitVector() && EltSz == 32)
7451 return false;
7452
7453 return true;
7454}
7455
7456// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
7457// that pairs of elements of the shufflemask represent the same index in each
7458// vector incrementing sequentially through the vectors.
7459// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
7460// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
7461// v2={e,f,g,h}
7462// Requires similar checks to that of isVTRNMask with respect the how results
7463// are returned.
7464static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7465 unsigned EltSz = VT.getScalarSizeInBits();
7466 if (EltSz == 64)
7467 return false;
7468
7469 unsigned NumElts = VT.getVectorNumElements();
7470 if (M.size() != NumElts && M.size() != NumElts*2)
7471 return false;
7472
7473 for (unsigned i = 0; i < M.size(); i += NumElts) {
7474 WhichResult = SelectPairHalf(NumElts, M, i);
7475 unsigned Idx = WhichResult * NumElts / 2;
7476 for (unsigned j = 0; j < NumElts; j += 2) {
7477 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7478 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
7479 return false;
7480 Idx += 1;
7481 }
7482 }
7483
7484 if (M.size() == NumElts*2)
7485 WhichResult = 0;
7486
7487 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7488 if (VT.is64BitVector() && EltSz == 32)
7489 return false;
7490
7491 return true;
7492}
7493
7494/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
7495/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7496/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
7497static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7498 unsigned EltSz = VT.getScalarSizeInBits();
7499 if (EltSz == 64)
7500 return false;
7501
7502 unsigned NumElts = VT.getVectorNumElements();
7503 if (M.size() != NumElts && M.size() != NumElts*2)
7504 return false;
7505
7506 for (unsigned i = 0; i < M.size(); i += NumElts) {
7507 WhichResult = SelectPairHalf(NumElts, M, i);
7508 unsigned Idx = WhichResult * NumElts / 2;
7509 for (unsigned j = 0; j < NumElts; j += 2) {
7510 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7511 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
7512 return false;
7513 Idx += 1;
7514 }
7515 }
7516
7517 if (M.size() == NumElts*2)
7518 WhichResult = 0;
7519
7520 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7521 if (VT.is64BitVector() && EltSz == 32)
7522 return false;
7523
7524 return true;
7525}
7526
7527/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
7528/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
7529static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
7530 unsigned &WhichResult,
7531 bool &isV_UNDEF) {
7532 isV_UNDEF = false;
7533 if (isVTRNMask(ShuffleMask, VT, WhichResult))
7534 return ARMISD::VTRN;
7535 if (isVUZPMask(ShuffleMask, VT, WhichResult))
7536 return ARMISD::VUZP;
7537 if (isVZIPMask(ShuffleMask, VT, WhichResult))
7538 return ARMISD::VZIP;
7539
7540 isV_UNDEF = true;
7541 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
7542 return ARMISD::VTRN;
7543 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7544 return ARMISD::VUZP;
7545 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7546 return ARMISD::VZIP;
7547
7548 return 0;
7549}
7550
7551/// \return true if this is a reverse operation on an vector.
7552static bool isReverseMask(ArrayRef<int> M, EVT VT) {
7553 unsigned NumElts = VT.getVectorNumElements();
7554 // Make sure the mask has the right size.
7555 if (NumElts != M.size())
7556 return false;
7557
7558 // Look for <15, ..., 3, -1, 1, 0>.
7559 for (unsigned i = 0; i != NumElts; ++i)
7560 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
7561 return false;
7562
7563 return true;
7564}
7565
7566static bool isTruncMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7567 unsigned NumElts = VT.getVectorNumElements();
7568 // Make sure the mask has the right size.
7569 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7570 return false;
7571
7572 // Half-width truncation patterns (e.g. v4i32 -> v8i16):
7573 // !Top && SingleSource: <0, 2, 4, 6, 0, 2, 4, 6>
7574 // !Top && !SingleSource: <0, 2, 4, 6, 8, 10, 12, 14>
7575 // Top && SingleSource: <1, 3, 5, 7, 1, 3, 5, 7>
7576 // Top && !SingleSource: <1, 3, 5, 7, 9, 11, 13, 15>
7577 int Ofs = Top ? 1 : 0;
7578 int Upper = SingleSource ? 0 : NumElts;
7579 for (int i = 0, e = NumElts / 2; i != e; ++i) {
7580 if (M[i] >= 0 && M[i] != (i * 2) + Ofs)
7581 return false;
7582 if (M[i + e] >= 0 && M[i + e] != (i * 2) + Ofs + Upper)
7583 return false;
7584 }
7585 return true;
7586}
7587
7588static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7589 unsigned NumElts = VT.getVectorNumElements();
7590 // Make sure the mask has the right size.
7591 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7592 return false;
7593
7594 // If Top
7595 // Look for <0, N, 2, N+2, 4, N+4, ..>.
7596 // This inserts Input2 into Input1
7597 // else if not Top
7598 // Look for <0, N+1, 2, N+3, 4, N+5, ..>
7599 // This inserts Input1 into Input2
7600 unsigned Offset = Top ? 0 : 1;
7601 unsigned N = SingleSource ? 0 : NumElts;
7602 for (unsigned i = 0; i < NumElts; i += 2) {
7603 if (M[i] >= 0 && M[i] != (int)i)
7604 return false;
7605 if (M[i + 1] >= 0 && M[i + 1] != (int)(N + i + Offset))
7606 return false;
7607 }
7608
7609 return true;
7610}
7611
7612static bool isVMOVNTruncMask(ArrayRef<int> M, EVT ToVT, bool rev) {
7613 unsigned NumElts = ToVT.getVectorNumElements();
7614 if (NumElts != M.size())
7615 return false;
7616
7617 // Test if the Trunc can be convertable to a VMOVN with this shuffle. We are
7618 // looking for patterns of:
7619 // !rev: 0 N/2 1 N/2+1 2 N/2+2 ...
7620 // rev: N/2 0 N/2+1 1 N/2+2 2 ...
7621
7622 unsigned Off0 = rev ? NumElts / 2 : 0;
7623 unsigned Off1 = rev ? 0 : NumElts / 2;
7624 for (unsigned i = 0; i < NumElts; i += 2) {
7625 if (M[i] >= 0 && M[i] != (int)(Off0 + i / 2))
7626 return false;
7627 if (M[i + 1] >= 0 && M[i + 1] != (int)(Off1 + i / 2))
7628 return false;
7629 }
7630
7631 return true;
7632}
7633
7634// Reconstruct an MVE VCVT from a BuildVector of scalar fptrunc, all extracted
7635// from a pair of inputs. For example:
7636// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7637// FP_ROUND(EXTRACT_ELT(Y, 0),
7638// FP_ROUND(EXTRACT_ELT(X, 1),
7639// FP_ROUND(EXTRACT_ELT(Y, 1), ...)
7641 const ARMSubtarget *ST) {
7642 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7643 if (!ST->hasMVEFloatOps())
7644 return SDValue();
7645
7646 SDLoc dl(BV);
7647 EVT VT = BV.getValueType();
7648 if (VT != MVT::v8f16)
7649 return SDValue();
7650
7651 // We are looking for a buildvector of fptrunc elements, where all the
7652 // elements are interleavingly extracted from two sources. Check the first two
7653 // items are valid enough and extract some info from them (they are checked
7654 // properly in the loop below).
7655 if (BV.getOperand(0).getOpcode() != ISD::FP_ROUND ||
7658 return SDValue();
7659 if (BV.getOperand(1).getOpcode() != ISD::FP_ROUND ||
7662 return SDValue();
7663 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7664 SDValue Op1 = BV.getOperand(1).getOperand(0).getOperand(0);
7665 if (Op0.getValueType() != MVT::v4f32 || Op1.getValueType() != MVT::v4f32)
7666 return SDValue();
7667
7668 // Check all the values in the BuildVector line up with our expectations.
7669 for (unsigned i = 1; i < 4; i++) {
7670 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7671 return Trunc.getOpcode() == ISD::FP_ROUND &&
7673 Trunc.getOperand(0).getOperand(0) == Op &&
7674 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7675 };
7676 if (!Check(BV.getOperand(i * 2 + 0), Op0, i))
7677 return SDValue();
7678 if (!Check(BV.getOperand(i * 2 + 1), Op1, i))
7679 return SDValue();
7680 }
7681
7682 SDValue N1 = DAG.getNode(ARMISD::VCVTN, dl, VT, DAG.getUNDEF(VT), Op0,
7683 DAG.getConstant(0, dl, MVT::i32));
7684 return DAG.getNode(ARMISD::VCVTN, dl, VT, N1, Op1,
7685 DAG.getConstant(1, dl, MVT::i32));
7686}
7687
7688// Reconstruct an MVE VCVT from a BuildVector of scalar fpext, all extracted
7689// from a single input on alternating lanes. For example:
7690// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7691// FP_ROUND(EXTRACT_ELT(X, 2),
7692// FP_ROUND(EXTRACT_ELT(X, 4), ...)
7694 const ARMSubtarget *ST) {
7695 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7696 if (!ST->hasMVEFloatOps())
7697 return SDValue();
7698
7699 SDLoc dl(BV);
7700 EVT VT = BV.getValueType();
7701 if (VT != MVT::v4f32)
7702 return SDValue();
7703
7704 // We are looking for a buildvector of fptext elements, where all the
7705 // elements are alternating lanes from a single source. For example <0,2,4,6>
7706 // or <1,3,5,7>. Check the first two items are valid enough and extract some
7707 // info from them (they are checked properly in the loop below).
7708 if (BV.getOperand(0).getOpcode() != ISD::FP_EXTEND ||
7710 return SDValue();
7711 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7713 if (Op0.getValueType() != MVT::v8f16 || (Offset != 0 && Offset != 1))
7714 return SDValue();
7715
7716 // Check all the values in the BuildVector line up with our expectations.
7717 for (unsigned i = 1; i < 4; i++) {
7718 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7719 return Trunc.getOpcode() == ISD::FP_EXTEND &&
7721 Trunc.getOperand(0).getOperand(0) == Op &&
7722 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7723 };
7724 if (!Check(BV.getOperand(i), Op0, 2 * i + Offset))
7725 return SDValue();
7726 }
7727
7728 return DAG.getNode(ARMISD::VCVTL, dl, VT, Op0,
7729 DAG.getConstant(Offset, dl, MVT::i32));
7730}
7731
7732// If N is an integer constant that can be moved into a register in one
7733// instruction, return an SDValue of such a constant (will become a MOV
7734// instruction). Otherwise return null.
7736 const ARMSubtarget *ST, const SDLoc &dl) {
7737 uint64_t Val;
7738 if (!isa<ConstantSDNode>(N))
7739 return SDValue();
7740 Val = N->getAsZExtVal();
7741
7742 if (ST->isThumb1Only()) {
7743 if (Val <= 255 || ~Val <= 255)
7744 return DAG.getConstant(Val, dl, MVT::i32);
7745 } else {
7746 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
7747 return DAG.getConstant(Val, dl, MVT::i32);
7748 }
7749 return SDValue();
7750}
7751
7753 const ARMSubtarget *ST) {
7754 SDLoc dl(Op);
7755 EVT VT = Op.getValueType();
7756
7757 assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!");
7758
7759 unsigned NumElts = VT.getVectorNumElements();
7760 unsigned BoolMask;
7761 unsigned BitsPerBool;
7762 if (NumElts == 2) {
7763 BitsPerBool = 8;
7764 BoolMask = 0xff;
7765 } else if (NumElts == 4) {
7766 BitsPerBool = 4;
7767 BoolMask = 0xf;
7768 } else if (NumElts == 8) {
7769 BitsPerBool = 2;
7770 BoolMask = 0x3;
7771 } else if (NumElts == 16) {
7772 BitsPerBool = 1;
7773 BoolMask = 0x1;
7774 } else
7775 return SDValue();
7776
7777 // If this is a single value copied into all lanes (a splat), we can just sign
7778 // extend that single value
7779 SDValue FirstOp = Op.getOperand(0);
7780 if (!isa<ConstantSDNode>(FirstOp) &&
7781 llvm::all_of(llvm::drop_begin(Op->ops()), [&FirstOp](const SDUse &U) {
7782 return U.get().isUndef() || U.get() == FirstOp;
7783 })) {
7784 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp,
7785 DAG.getValueType(MVT::i1));
7786 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext);
7787 }
7788
7789 // First create base with bits set where known
7790 unsigned Bits32 = 0;
7791 for (unsigned i = 0; i < NumElts; ++i) {
7792 SDValue V = Op.getOperand(i);
7793 if (!isa<ConstantSDNode>(V) && !V.isUndef())
7794 continue;
7795 bool BitSet = V.isUndef() ? false : V->getAsZExtVal();
7796 if (BitSet)
7797 Bits32 |= BoolMask << (i * BitsPerBool);
7798 }
7799
7800 // Add in unknown nodes
7802 DAG.getConstant(Bits32, dl, MVT::i32));
7803 for (unsigned i = 0; i < NumElts; ++i) {
7804 SDValue V = Op.getOperand(i);
7805 if (isa<ConstantSDNode>(V) || V.isUndef())
7806 continue;
7807 Base = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Base, V,
7808 DAG.getConstant(i, dl, MVT::i32));
7809 }
7810
7811 return Base;
7812}
7813
7815 const ARMSubtarget *ST) {
7816 if (!ST->hasMVEIntegerOps())
7817 return SDValue();
7818
7819 // We are looking for a buildvector where each element is Op[0] + i*N
7820 EVT VT = Op.getValueType();
7821 SDValue Op0 = Op.getOperand(0);
7822 unsigned NumElts = VT.getVectorNumElements();
7823
7824 // Get the increment value from operand 1
7825 SDValue Op1 = Op.getOperand(1);
7826 if (Op1.getOpcode() != ISD::ADD || Op1.getOperand(0) != Op0 ||
7827 !isa<ConstantSDNode>(Op1.getOperand(1)))
7828 return SDValue();
7829 unsigned N = Op1.getConstantOperandVal(1);
7830 if (N != 1 && N != 2 && N != 4 && N != 8)
7831 return SDValue();
7832
7833 // Check that each other operand matches
7834 for (unsigned I = 2; I < NumElts; I++) {
7835 SDValue OpI = Op.getOperand(I);
7836 if (OpI.getOpcode() != ISD::ADD || OpI.getOperand(0) != Op0 ||
7837 !isa<ConstantSDNode>(OpI.getOperand(1)) ||
7838 OpI.getConstantOperandVal(1) != I * N)
7839 return SDValue();
7840 }
7841
7842 SDLoc DL(Op);
7843 return DAG.getNode(ARMISD::VIDUP, DL, DAG.getVTList(VT, MVT::i32), Op0,
7844 DAG.getConstant(N, DL, MVT::i32));
7845}
7846
7847// Returns true if the operation N can be treated as qr instruction variant at
7848// operand Op.
7849static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op) {
7850 switch (N->getOpcode()) {
7851 case ISD::ADD:
7852 case ISD::MUL:
7853 case ISD::SADDSAT:
7854 case ISD::UADDSAT:
7855 return true;
7856 case ISD::SUB:
7857 case ISD::SSUBSAT:
7858 case ISD::USUBSAT:
7859 return N->getOperand(1).getNode() == Op;
7861 switch (N->getConstantOperandVal(0)) {
7862 case Intrinsic::arm_mve_add_predicated:
7863 case Intrinsic::arm_mve_mul_predicated:
7864 case Intrinsic::arm_mve_qadd_predicated:
7865 case Intrinsic::arm_mve_vhadd:
7866 case Intrinsic::arm_mve_hadd_predicated:
7867 case Intrinsic::arm_mve_vqdmulh:
7868 case Intrinsic::arm_mve_qdmulh_predicated:
7869 case Intrinsic::arm_mve_vqrdmulh:
7870 case Intrinsic::arm_mve_qrdmulh_predicated:
7871 case Intrinsic::arm_mve_vqdmull:
7872 case Intrinsic::arm_mve_vqdmull_predicated:
7873 return true;
7874 case Intrinsic::arm_mve_sub_predicated:
7875 case Intrinsic::arm_mve_qsub_predicated:
7876 case Intrinsic::arm_mve_vhsub:
7877 case Intrinsic::arm_mve_hsub_predicated:
7878 return N->getOperand(2).getNode() == Op;
7879 default:
7880 return false;
7881 }
7882 default:
7883 return false;
7884 }
7885}
7886
7887// If this is a case we can't handle, return null and let the default
7888// expansion code take care of it.
7889SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
7890 const ARMSubtarget *ST) const {
7891 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
7892 SDLoc dl(Op);
7893 EVT VT = Op.getValueType();
7894
7895 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
7896 return LowerBUILD_VECTOR_i1(Op, DAG, ST);
7897
7898 if (SDValue R = LowerBUILD_VECTORToVIDUP(Op, DAG, ST))
7899 return R;
7900
7901 APInt SplatBits, SplatUndef;
7902 unsigned SplatBitSize;
7903 bool HasAnyUndefs;
7904 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7905 if (SplatUndef.isAllOnes())
7906 return DAG.getUNDEF(VT);
7907
7908 // If all the users of this constant splat are qr instruction variants,
7909 // generate a vdup of the constant.
7910 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize &&
7911 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&
7912 all_of(BVN->uses(),
7913 [BVN](const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {
7914 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7915 : SplatBitSize == 16 ? MVT::v8i16
7916 : MVT::v16i8;
7917 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7918 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7919 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7920 }
7921
7922 if ((ST->hasNEON() && SplatBitSize <= 64) ||
7923 (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
7924 // Check if an immediate VMOV works.
7925 EVT VmovVT;
7926 SDValue Val =
7927 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
7928 SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm);
7929
7930 if (Val.getNode()) {
7931 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
7932 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7933 }
7934
7935 // Try an immediate VMVN.
7936 uint64_t NegatedImm = (~SplatBits).getZExtValue();
7937 Val = isVMOVModifiedImm(
7938 NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
7939 VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
7940 if (Val.getNode()) {
7941 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
7942 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7943 }
7944
7945 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
7946 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
7947 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
7948 if (ImmVal != -1) {
7949 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
7950 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
7951 }
7952 }
7953
7954 // If we are under MVE, generate a VDUP(constant), bitcast to the original
7955 // type.
7956 if (ST->hasMVEIntegerOps() &&
7957 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32)) {
7958 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7959 : SplatBitSize == 16 ? MVT::v8i16
7960 : MVT::v16i8;
7961 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7962 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7963 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7964 }
7965 }
7966 }
7967
7968 // Scan through the operands to see if only one value is used.
7969 //
7970 // As an optimisation, even if more than one value is used it may be more
7971 // profitable to splat with one value then change some lanes.
7972 //
7973 // Heuristically we decide to do this if the vector has a "dominant" value,
7974 // defined as splatted to more than half of the lanes.
7975 unsigned NumElts = VT.getVectorNumElements();
7976 bool isOnlyLowElement = true;
7977 bool usesOnlyOneValue = true;
7978 bool hasDominantValue = false;
7979 bool isConstant = true;
7980
7981 // Map of the number of times a particular SDValue appears in the
7982 // element list.
7983 DenseMap<SDValue, unsigned> ValueCounts;
7984 SDValue Value;
7985 for (unsigned i = 0; i < NumElts; ++i) {
7986 SDValue V = Op.getOperand(i);
7987 if (V.isUndef())
7988 continue;
7989 if (i > 0)
7990 isOnlyLowElement = false;
7991 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
7992 isConstant = false;
7993
7994 ValueCounts.insert(std::make_pair(V, 0));
7995 unsigned &Count = ValueCounts[V];
7996
7997 // Is this value dominant? (takes up more than half of the lanes)
7998 if (++Count > (NumElts / 2)) {
7999 hasDominantValue = true;
8000 Value = V;
8001 }
8002 }
8003 if (ValueCounts.size() != 1)
8004 usesOnlyOneValue = false;
8005 if (!Value.getNode() && !ValueCounts.empty())
8006 Value = ValueCounts.begin()->first;
8007
8008 if (ValueCounts.empty())
8009 return DAG.getUNDEF(VT);
8010
8011 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
8012 // Keep going if we are hitting this case.
8013 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
8014 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
8015
8016 unsigned EltSize = VT.getScalarSizeInBits();
8017
8018 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
8019 // i32 and try again.
8020 if (hasDominantValue && EltSize <= 32) {
8021 if (!isConstant) {
8022 SDValue N;
8023
8024 // If we are VDUPing a value that comes directly from a vector, that will
8025 // cause an unnecessary move to and from a GPR, where instead we could
8026 // just use VDUPLANE. We can only do this if the lane being extracted
8027 // is at a constant index, as the VDUP from lane instructions only have
8028 // constant-index forms.
8029 ConstantSDNode *constIndex;
8030 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8031 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
8032 // We need to create a new undef vector to use for the VDUPLANE if the
8033 // size of the vector from which we get the value is different than the
8034 // size of the vector that we need to create. We will insert the element
8035 // such that the register coalescer will remove unnecessary copies.
8036 if (VT != Value->getOperand(0).getValueType()) {
8037 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
8039 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8040 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
8041 Value, DAG.getConstant(index, dl, MVT::i32)),
8042 DAG.getConstant(index, dl, MVT::i32));
8043 } else
8044 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8045 Value->getOperand(0), Value->getOperand(1));
8046 } else
8047 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
8048
8049 if (!usesOnlyOneValue) {
8050 // The dominant value was splatted as 'N', but we now have to insert
8051 // all differing elements.
8052 for (unsigned I = 0; I < NumElts; ++I) {
8053 if (Op.getOperand(I) == Value)
8054 continue;
8056 Ops.push_back(N);
8057 Ops.push_back(Op.getOperand(I));
8058 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
8059 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
8060 }
8061 }
8062 return N;
8063 }
8067 assert(FVT == MVT::f32 || FVT == MVT::f16);
8068 MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
8069 for (unsigned i = 0; i < NumElts; ++i)
8070 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, IVT,
8071 Op.getOperand(i)));
8072 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), IVT, NumElts);
8073 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
8074 Val = LowerBUILD_VECTOR(Val, DAG, ST);
8075 if (Val.getNode())
8076 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8077 }
8078 if (usesOnlyOneValue) {
8079 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
8080 if (isConstant && Val.getNode())
8081 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
8082 }
8083 }
8084
8085 // If all elements are constants and the case above didn't get hit, fall back
8086 // to the default expansion, which will generate a load from the constant
8087 // pool.
8088 if (isConstant)
8089 return SDValue();
8090
8091 // Reconstruct the BUILDVECTOR to one of the legal shuffles (such as vext and
8092 // vmovn). Empirical tests suggest this is rarely worth it for vectors of
8093 // length <= 2.
8094 if (NumElts >= 4)
8095 if (SDValue shuffle = ReconstructShuffle(Op, DAG))
8096 return shuffle;
8097
8098 // Attempt to turn a buildvector of scalar fptrunc's or fpext's back into
8099 // VCVT's
8100 if (SDValue VCVT = LowerBuildVectorOfFPTrunc(Op, DAG, Subtarget))
8101 return VCVT;
8102 if (SDValue VCVT = LowerBuildVectorOfFPExt(Op, DAG, Subtarget))
8103 return VCVT;
8104
8105 if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
8106 // If we haven't found an efficient lowering, try splitting a 128-bit vector
8107 // into two 64-bit vectors; we might discover a better way to lower it.
8108 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
8109 EVT ExtVT = VT.getVectorElementType();
8110 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
8111 SDValue Lower = DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[0], NumElts / 2));
8112 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
8113 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
8114 SDValue Upper =
8115 DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[NumElts / 2], NumElts / 2));
8116 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
8117 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
8118 if (Lower && Upper)
8119 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
8120 }
8121
8122 // Vectors with 32- or 64-bit elements can be built by directly assigning
8123 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
8124 // will be legalized.
8125 if (EltSize >= 32) {
8126 // Do the expansion with floating-point types, since that is what the VFP
8127 // registers are defined to use, and since i64 is not legal.
8128 EVT EltVT = EVT::getFloatingPointVT(EltSize);
8129 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8131 for (unsigned i = 0; i < NumElts; ++i)
8132 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
8133 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8134 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8135 }
8136
8137 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
8138 // know the default expansion would otherwise fall back on something even
8139 // worse. For a vector with one or two non-undef values, that's
8140 // scalar_to_vector for the elements followed by a shuffle (provided the
8141 // shuffle is valid for the target) and materialization element by element
8142 // on the stack followed by a load for everything else.
8143 if (!isConstant && !usesOnlyOneValue) {
8144 SDValue Vec = DAG.getUNDEF(VT);
8145 for (unsigned i = 0 ; i < NumElts; ++i) {
8146 SDValue V = Op.getOperand(i);
8147 if (V.isUndef())
8148 continue;
8149 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
8150 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
8151 }
8152 return Vec;
8153 }
8154
8155 return SDValue();
8156}
8157
8158// Gather data to see if the operation can be modelled as a
8159// shuffle in combination with VEXTs.
8160SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
8161 SelectionDAG &DAG) const {
8162 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
8163 SDLoc dl(Op);
8164 EVT VT = Op.getValueType();
8165 unsigned NumElts = VT.getVectorNumElements();
8166
8167 struct ShuffleSourceInfo {
8168 SDValue Vec;
8169 unsigned MinElt = std::numeric_limits<unsigned>::max();
8170 unsigned MaxElt = 0;
8171
8172 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
8173 // be compatible with the shuffle we intend to construct. As a result
8174 // ShuffleVec will be some sliding window into the original Vec.
8175 SDValue ShuffleVec;
8176
8177 // Code should guarantee that element i in Vec starts at element "WindowBase
8178 // + i * WindowScale in ShuffleVec".
8179 int WindowBase = 0;
8180 int WindowScale = 1;
8181
8182 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
8183
8184 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
8185 };
8186
8187 // First gather all vectors used as an immediate source for this BUILD_VECTOR
8188 // node.
8190 for (unsigned i = 0; i < NumElts; ++i) {
8191 SDValue V = Op.getOperand(i);
8192 if (V.isUndef())
8193 continue;
8194 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
8195 // A shuffle can only come from building a vector from various
8196 // elements of other vectors.
8197 return SDValue();
8198 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
8199 // Furthermore, shuffles require a constant mask, whereas extractelts
8200 // accept variable indices.
8201 return SDValue();
8202 }
8203
8204 // Add this element source to the list if it's not already there.
8205 SDValue SourceVec = V.getOperand(0);
8206 auto Source = llvm::find(Sources, SourceVec);
8207 if (Source == Sources.end())
8208 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
8209
8210 // Update the minimum and maximum lane number seen.
8211 unsigned EltNo = V.getConstantOperandVal(1);
8212 Source->MinElt = std::min(Source->MinElt, EltNo);
8213 Source->MaxElt = std::max(Source->MaxElt, EltNo);
8214 }
8215
8216 // Currently only do something sane when at most two source vectors
8217 // are involved.
8218 if (Sources.size() > 2)
8219 return SDValue();
8220
8221 // Find out the smallest element size among result and two sources, and use
8222 // it as element size to build the shuffle_vector.
8223 EVT SmallestEltTy = VT.getVectorElementType();
8224 for (auto &Source : Sources) {
8225 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
8226 if (SrcEltTy.bitsLT(SmallestEltTy))
8227 SmallestEltTy = SrcEltTy;
8228 }
8229 unsigned ResMultiplier =
8230 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
8231 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
8232 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
8233
8234 // If the source vector is too wide or too narrow, we may nevertheless be able
8235 // to construct a compatible shuffle either by concatenating it with UNDEF or
8236 // extracting a suitable range of elements.
8237 for (auto &Src : Sources) {
8238 EVT SrcVT = Src.ShuffleVec.getValueType();
8239
8240 uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
8241 uint64_t VTSize = VT.getFixedSizeInBits();
8242 if (SrcVTSize == VTSize)
8243 continue;
8244
8245 // This stage of the search produces a source with the same element type as
8246 // the original, but with a total width matching the BUILD_VECTOR output.
8247 EVT EltVT = SrcVT.getVectorElementType();
8248 unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
8249 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
8250
8251 if (SrcVTSize < VTSize) {
8252 if (2 * SrcVTSize != VTSize)
8253 return SDValue();
8254 // We can pad out the smaller vector for free, so if it's part of a
8255 // shuffle...
8256 Src.ShuffleVec =
8257 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
8258 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
8259 continue;
8260 }
8261
8262 if (SrcVTSize != 2 * VTSize)
8263 return SDValue();
8264
8265 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8266 // Span too large for a VEXT to cope
8267 return SDValue();
8268 }
8269
8270 if (Src.MinElt >= NumSrcElts) {
8271 // The extraction can just take the second half
8272 Src.ShuffleVec =
8273 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8274 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8275 Src.WindowBase = -NumSrcElts;
8276 } else if (Src.MaxElt < NumSrcElts) {
8277 // The extraction can just take the first half
8278 Src.ShuffleVec =
8279 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8280 DAG.getConstant(0, dl, MVT::i32));
8281 } else {
8282 // An actual VEXT is needed
8283 SDValue VEXTSrc1 =
8284 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8285 DAG.getConstant(0, dl, MVT::i32));
8286 SDValue VEXTSrc2 =
8287 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8288 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8289
8290 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
8291 VEXTSrc2,
8292 DAG.getConstant(Src.MinElt, dl, MVT::i32));
8293 Src.WindowBase = -Src.MinElt;
8294 }
8295 }
8296
8297 // Another possible incompatibility occurs from the vector element types. We
8298 // can fix this by bitcasting the source vectors to the same type we intend
8299 // for the shuffle.
8300 for (auto &Src : Sources) {
8301 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8302 if (SrcEltTy == SmallestEltTy)
8303 continue;
8304 assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
8305 Src.ShuffleVec = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, ShuffleVT, Src.ShuffleVec);
8306 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
8307 Src.WindowBase *= Src.WindowScale;
8308 }
8309
8310 // Final check before we try to actually produce a shuffle.
8311 LLVM_DEBUG(for (auto Src
8312 : Sources)
8313 assert(Src.ShuffleVec.getValueType() == ShuffleVT););
8314
8315 // The stars all align, our next step is to produce the mask for the shuffle.
8317 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
8318 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
8319 SDValue Entry = Op.getOperand(i);
8320 if (Entry.isUndef())
8321 continue;
8322
8323 auto Src = llvm::find(Sources, Entry.getOperand(0));
8324 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8325
8326 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
8327 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
8328 // segment.
8329 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8330 int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
8331 VT.getScalarSizeInBits());
8332 int LanesDefined = BitsDefined / BitsPerShuffleLane;
8333
8334 // This source is expected to fill ResMultiplier lanes of the final shuffle,
8335 // starting at the appropriate offset.
8336 int *LaneMask = &Mask[i * ResMultiplier];
8337
8338 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8339 ExtractBase += NumElts * (Src - Sources.begin());
8340 for (int j = 0; j < LanesDefined; ++j)
8341 LaneMask[j] = ExtractBase + j;
8342 }
8343
8344
8345 // We can't handle more than two sources. This should have already
8346 // been checked before this point.
8347 assert(Sources.size() <= 2 && "Too many sources!");
8348
8349 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
8350 for (unsigned i = 0; i < Sources.size(); ++i)
8351 ShuffleOps[i] = Sources[i].ShuffleVec;
8352
8353 SDValue Shuffle = buildLegalVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
8354 ShuffleOps[1], Mask, DAG);
8355 if (!Shuffle)
8356 return SDValue();
8357 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Shuffle);
8358}
8359
8361 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8370 OP_VUZPL, // VUZP, left result
8371 OP_VUZPR, // VUZP, right result
8372 OP_VZIPL, // VZIP, left result
8373 OP_VZIPR, // VZIP, right result
8374 OP_VTRNL, // VTRN, left result
8375 OP_VTRNR // VTRN, right result
8377
8378static bool isLegalMVEShuffleOp(unsigned PFEntry) {
8379 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8380 switch (OpNum) {
8381 case OP_COPY:
8382 case OP_VREV:
8383 case OP_VDUP0:
8384 case OP_VDUP1:
8385 case OP_VDUP2:
8386 case OP_VDUP3:
8387 return true;
8388 }
8389 return false;
8390}
8391
8392/// isShuffleMaskLegal - Targets can use this to indicate that they only
8393/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
8394/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
8395/// are assumed to be legal.
8397 if (VT.getVectorNumElements() == 4 &&
8398 (VT.is128BitVector() || VT.is64BitVector())) {
8399 unsigned PFIndexes[4];
8400 for (unsigned i = 0; i != 4; ++i) {
8401 if (M[i] < 0)
8402 PFIndexes[i] = 8;
8403 else
8404 PFIndexes[i] = M[i];
8405 }
8406
8407 // Compute the index in the perfect shuffle table.
8408 unsigned PFTableIndex =
8409 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8410 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8411 unsigned Cost = (PFEntry >> 30);
8412
8413 if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
8414 return true;
8415 }
8416
8417 bool ReverseVEXT, isV_UNDEF;
8418 unsigned Imm, WhichResult;
8419
8420 unsigned EltSize = VT.getScalarSizeInBits();
8421 if (EltSize >= 32 ||
8423 ShuffleVectorInst::isIdentityMask(M, M.size()) ||
8424 isVREVMask(M, VT, 64) ||
8425 isVREVMask(M, VT, 32) ||
8426 isVREVMask(M, VT, 16))
8427 return true;
8428 else if (Subtarget->hasNEON() &&
8429 (isVEXTMask(M, VT, ReverseVEXT, Imm) ||
8430 isVTBLMask(M, VT) ||
8431 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
8432 return true;
8433 else if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8434 isReverseMask(M, VT))
8435 return true;
8436 else if (Subtarget->hasMVEIntegerOps() &&
8437 (isVMOVNMask(M, VT, true, false) ||
8438 isVMOVNMask(M, VT, false, false) || isVMOVNMask(M, VT, true, true)))
8439 return true;
8440 else if (Subtarget->hasMVEIntegerOps() &&
8441 (isTruncMask(M, VT, false, false) ||
8442 isTruncMask(M, VT, false, true) ||
8443 isTruncMask(M, VT, true, false) || isTruncMask(M, VT, true, true)))
8444 return true;
8445 else
8446 return false;
8447}
8448
8449/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8450/// the specified operations to build the shuffle.
8451static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8452 SDValue RHS, SelectionDAG &DAG,
8453 const SDLoc &dl) {
8454 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8455 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8456 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8457
8458 if (OpNum == OP_COPY) {
8459 if (LHSID == (1*9+2)*9+3) return LHS;
8460 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8461 return RHS;
8462 }
8463
8464 SDValue OpLHS, OpRHS;
8465 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8466 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8467 EVT VT = OpLHS.getValueType();
8468
8469 switch (OpNum) {
8470 default: llvm_unreachable("Unknown shuffle opcode!");
8471 case OP_VREV:
8472 // VREV divides the vector in half and swaps within the half.
8473 if (VT.getScalarSizeInBits() == 32)
8474 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
8475 // vrev <4 x i16> -> VREV32
8476 if (VT.getScalarSizeInBits() == 16)
8477 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
8478 // vrev <4 x i8> -> VREV16
8479 assert(VT.getScalarSizeInBits() == 8);
8480 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
8481 case OP_VDUP0:
8482 case OP_VDUP1:
8483 case OP_VDUP2:
8484 case OP_VDUP3:
8485 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8486 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
8487 case OP_VEXT1:
8488 case OP_VEXT2:
8489 case OP_VEXT3:
8490 return DAG.getNode(ARMISD::VEXT, dl, VT,
8491 OpLHS, OpRHS,
8492 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
8493 case OP_VUZPL:
8494 case OP_VUZPR:
8495 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
8496 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
8497 case OP_VZIPL:
8498 case OP_VZIPR:
8499 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
8500 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
8501 case OP_VTRNL:
8502 case OP_VTRNR:
8503 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
8504 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
8505 }
8506}
8507
8509 ArrayRef<int> ShuffleMask,
8510 SelectionDAG &DAG) {
8511 // Check to see if we can use the VTBL instruction.
8512 SDValue V1 = Op.getOperand(0);
8513 SDValue V2 = Op.getOperand(1);
8514 SDLoc DL(Op);
8515
8516 SmallVector<SDValue, 8> VTBLMask;
8517 for (int I : ShuffleMask)
8518 VTBLMask.push_back(DAG.getConstant(I, DL, MVT::i32));
8519
8520 if (V2.getNode()->isUndef())
8521 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
8522 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8523
8524 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
8525 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8526}
8527
8529 SDLoc DL(Op);
8530 EVT VT = Op.getValueType();
8531
8532 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8533 "Expect an v8i16/v16i8 type");
8534 SDValue OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, Op.getOperand(0));
8535 // For a v16i8 type: After the VREV, we have got <7, ..., 0, 15, ..., 8>. Now,
8536 // extract the first 8 bytes into the top double word and the last 8 bytes
8537 // into the bottom double word, through a new vector shuffle that will be
8538 // turned into a VEXT on Neon, or a couple of VMOVDs on MVE.
8539 std::vector<int> NewMask;
8540 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8541 NewMask.push_back(VT.getVectorNumElements() / 2 + i);
8542 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8543 NewMask.push_back(i);
8544 return DAG.getVectorShuffle(VT, DL, OpLHS, OpLHS, NewMask);
8545}
8546
8548 switch (VT.getSimpleVT().SimpleTy) {
8549 case MVT::v2i1:
8550 return MVT::v2f64;
8551 case MVT::v4i1:
8552 return MVT::v4i32;
8553 case MVT::v8i1:
8554 return MVT::v8i16;
8555 case MVT::v16i1:
8556 return MVT::v16i8;
8557 default:
8558 llvm_unreachable("Unexpected vector predicate type");
8559 }
8560}
8561
8563 SelectionDAG &DAG) {
8564 // Converting from boolean predicates to integers involves creating a vector
8565 // of all ones or all zeroes and selecting the lanes based upon the real
8566 // predicate.
8568 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), dl, MVT::i32);
8569 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllOnes);
8570
8571 SDValue AllZeroes =
8572 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0x0), dl, MVT::i32);
8573 AllZeroes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes);
8574
8575 // Get full vector type from predicate type
8577
8578 SDValue RecastV1;
8579 // If the real predicate is an v8i1 or v4i1 (not v16i1) then we need to recast
8580 // this to a v16i1. This cannot be done with an ordinary bitcast because the
8581 // sizes are not the same. We have to use a MVE specific PREDICATE_CAST node,
8582 // since we know in hardware the sizes are really the same.
8583 if (VT != MVT::v16i1)
8584 RecastV1 = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred);
8585 else
8586 RecastV1 = Pred;
8587
8588 // Select either all ones or zeroes depending upon the real predicate bits.
8589 SDValue PredAsVector =
8590 DAG.getNode(ISD::VSELECT, dl, MVT::v16i8, RecastV1, AllOnes, AllZeroes);
8591
8592 // Recast our new predicate-as-integer v16i8 vector into something
8593 // appropriate for the shuffle, i.e. v4i32 for a real v4i1 predicate.
8594 return DAG.getNode(ISD::BITCAST, dl, NewVT, PredAsVector);
8595}
8596
8598 const ARMSubtarget *ST) {
8599 EVT VT = Op.getValueType();
8600 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8601 ArrayRef<int> ShuffleMask = SVN->getMask();
8602
8603 assert(ST->hasMVEIntegerOps() &&
8604 "No support for vector shuffle of boolean predicates");
8605
8606 SDValue V1 = Op.getOperand(0);
8607 SDValue V2 = Op.getOperand(1);
8608 SDLoc dl(Op);
8609 if (isReverseMask(ShuffleMask, VT)) {
8610 SDValue cast = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, V1);
8611 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, cast);
8612 SDValue srl = DAG.getNode(ISD::SRL, dl, MVT::i32, rbit,
8613 DAG.getConstant(16, dl, MVT::i32));
8614 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, srl);
8615 }
8616
8617 // Until we can come up with optimised cases for every single vector
8618 // shuffle in existence we have chosen the least painful strategy. This is
8619 // to essentially promote the boolean predicate to a 8-bit integer, where
8620 // each predicate represents a byte. Then we fall back on a normal integer
8621 // vector shuffle and convert the result back into a predicate vector. In
8622 // many cases the generated code might be even better than scalar code
8623 // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit
8624 // fields in a register into 8 other arbitrary 2-bit fields!
8625 SDValue PredAsVector1 = PromoteMVEPredVector(dl, V1, VT, DAG);
8626 EVT NewVT = PredAsVector1.getValueType();
8627 SDValue PredAsVector2 = V2.isUndef() ? DAG.getUNDEF(NewVT)
8628 : PromoteMVEPredVector(dl, V2, VT, DAG);
8629 assert(PredAsVector2.getValueType() == NewVT &&
8630 "Expected identical vector type in expanded i1 shuffle!");
8631
8632 // Do the shuffle!
8633 SDValue Shuffled = DAG.getVectorShuffle(NewVT, dl, PredAsVector1,
8634 PredAsVector2, ShuffleMask);
8635
8636 // Now return the result of comparing the shuffled vector with zero,
8637 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1
8638 // we convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
8639 if (VT == MVT::v2i1) {
8640 SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Shuffled);
8641 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
8642 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8643 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
8644 }
8645 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
8646 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8647}
8648
8650 ArrayRef<int> ShuffleMask,
8651 SelectionDAG &DAG) {
8652 // Attempt to lower the vector shuffle using as many whole register movs as
8653 // possible. This is useful for types smaller than 32bits, which would
8654 // often otherwise become a series for grp movs.
8655 SDLoc dl(Op);
8656 EVT VT = Op.getValueType();
8657 if (VT.getScalarSizeInBits() >= 32)
8658 return SDValue();
8659
8660 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8661 "Unexpected vector type");
8662 int NumElts = VT.getVectorNumElements();
8663 int QuarterSize = NumElts / 4;
8664 // The four final parts of the vector, as i32's
8665 SDValue Parts[4];
8666
8667 // Look for full lane vmovs like <0,1,2,3> or <u,5,6,7> etc, (but not
8668 // <u,u,u,u>), returning the vmov lane index
8669 auto getMovIdx = [](ArrayRef<int> ShuffleMask, int Start, int Length) {
8670 // Detect which mov lane this would be from the first non-undef element.
8671 int MovIdx = -1;
8672 for (int i = 0; i < Length; i++) {
8673 if (ShuffleMask[Start + i] >= 0) {
8674 if (ShuffleMask[Start + i] % Length != i)
8675 return -1;
8676 MovIdx = ShuffleMask[Start + i] / Length;
8677 break;
8678 }
8679 }
8680 // If all items are undef, leave this for other combines
8681 if (MovIdx == -1)
8682 return -1;
8683 // Check the remaining values are the correct part of the same mov
8684 for (int i = 1; i < Length; i++) {
8685 if (ShuffleMask[Start + i] >= 0 &&
8686 (ShuffleMask[Start + i] / Length != MovIdx ||
8687 ShuffleMask[Start + i] % Length != i))
8688 return -1;
8689 }
8690 return MovIdx;
8691 };
8692
8693 for (int Part = 0; Part < 4; ++Part) {
8694 // Does this part look like a mov
8695 int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);
8696 if (Elt != -1) {
8697 SDValue Input = Op->getOperand(0);
8698 if (Elt >= 4) {
8699 Input = Op->getOperand(1);
8700 Elt -= 4;
8701 }
8702 SDValue BitCast = DAG.getBitcast(MVT::v4f32, Input);
8703 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, BitCast,
8704 DAG.getConstant(Elt, dl, MVT::i32));
8705 }
8706 }
8707
8708 // Nothing interesting found, just return
8709 if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])
8710 return SDValue();
8711
8712 // The other parts need to be built with the old shuffle vector, cast to a
8713 // v4i32 and extract_vector_elts
8714 if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {
8715 SmallVector<int, 16> NewShuffleMask;
8716 for (int Part = 0; Part < 4; ++Part)
8717 for (int i = 0; i < QuarterSize; i++)
8718 NewShuffleMask.push_back(
8719 Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
8720 SDValue NewShuffle = DAG.getVectorShuffle(
8721 VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
8722 SDValue BitCast = DAG.getBitcast(MVT::v4f32, NewShuffle);
8723
8724 for (int Part = 0; Part < 4; ++Part)
8725 if (!Parts[Part])
8726 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32,
8727 BitCast, DAG.getConstant(Part, dl, MVT::i32));
8728 }
8729 // Build a vector out of the various parts and bitcast it back to the original
8730 // type.
8731 SDValue NewVec = DAG.getNode(ARMISD::BUILD_VECTOR, dl, MVT::v4f32, Parts);
8732 return DAG.getBitcast(VT, NewVec);
8733}
8734
8736 ArrayRef<int> ShuffleMask,
8737 SelectionDAG &DAG) {
8738 SDValue V1 = Op.getOperand(0);
8739 SDValue V2 = Op.getOperand(1);
8740 EVT VT = Op.getValueType();
8741 unsigned NumElts = VT.getVectorNumElements();
8742
8743 // An One-Off Identity mask is one that is mostly an identity mask from as
8744 // single source but contains a single element out-of-place, either from a
8745 // different vector or from another position in the same vector. As opposed to
8746 // lowering this via a ARMISD::BUILD_VECTOR we can generate an extract/insert
8747 // pair directly.
8748 auto isOneOffIdentityMask = [](ArrayRef<int> Mask, EVT VT, int BaseOffset,
8749 int &OffElement) {
8750 OffElement = -1;
8751 int NonUndef = 0;
8752 for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
8753 if (Mask[i] == -1)
8754 continue;
8755 NonUndef++;
8756 if (Mask[i] != i + BaseOffset) {
8757 if (OffElement == -1)
8758 OffElement = i;
8759 else
8760 return false;
8761 }
8762 }
8763 return NonUndef > 2 && OffElement != -1;
8764 };
8765 int OffElement;
8766 SDValue VInput;
8767 if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))
8768 VInput = V1;
8769 else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))
8770 VInput = V2;
8771 else
8772 return SDValue();
8773
8774 SDLoc dl(Op);
8775 EVT SVT = VT.getScalarType() == MVT::i8 || VT.getScalarType() == MVT::i16
8776 ? MVT::i32
8777 : VT.getScalarType();
8778 SDValue Elt = DAG.getNode(
8779 ISD::EXTRACT_VECTOR_ELT, dl, SVT,
8780 ShuffleMask[OffElement] < (int)NumElts ? V1 : V2,
8781 DAG.getVectorIdxConstant(ShuffleMask[OffElement] % NumElts, dl));
8782 return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, VInput, Elt,
8783 DAG.getVectorIdxConstant(OffElement % NumElts, dl));
8784}
8785
8787 const ARMSubtarget *ST) {
8788 SDValue V1 = Op.getOperand(0);
8789 SDValue V2 = Op.getOperand(1);
8790 SDLoc dl(Op);
8791 EVT VT = Op.getValueType();
8792 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8793 unsigned EltSize = VT.getScalarSizeInBits();
8794
8795 if (ST->hasMVEIntegerOps() && EltSize == 1)
8796 return LowerVECTOR_SHUFFLE_i1(Op, DAG, ST);
8797
8798 // Convert shuffles that are directly supported on NEON to target-specific
8799 // DAG nodes, instead of keeping them as shuffles and matching them again
8800 // during code selection. This is more efficient and avoids the possibility
8801 // of inconsistencies between legalization and selection.
8802 // FIXME: floating-point vectors should be canonicalized to integer vectors
8803 // of the same time so that they get CSEd properly.
8804 ArrayRef<int> ShuffleMask = SVN->getMask();
8805
8806 if (EltSize <= 32) {
8807 if (SVN->isSplat()) {
8808 int Lane = SVN->getSplatIndex();
8809 // If this is undef splat, generate it via "just" vdup, if possible.
8810 if (Lane == -1) Lane = 0;
8811
8812 // Test if V1 is a SCALAR_TO_VECTOR.
8813 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
8814 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8815 }
8816 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
8817 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
8818 // reaches it).
8819 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
8820 !isa<ConstantSDNode>(V1.getOperand(0))) {
8821 bool IsScalarToVector = true;
8822 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
8823 if (!V1.getOperand(i).isUndef()) {
8824 IsScalarToVector = false;
8825 break;
8826 }
8827 if (IsScalarToVector)
8828 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8829 }
8830 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
8831 DAG.getConstant(Lane, dl, MVT::i32));
8832 }
8833
8834 bool ReverseVEXT = false;
8835 unsigned Imm = 0;
8836 if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
8837 if (ReverseVEXT)
8838 std::swap(V1, V2);
8839 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
8840 DAG.getConstant(Imm, dl, MVT::i32));
8841 }
8842
8843 if (isVREVMask(ShuffleMask, VT, 64))
8844 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
8845 if (isVREVMask(ShuffleMask, VT, 32))
8846 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
8847 if (isVREVMask(ShuffleMask, VT, 16))
8848 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
8849
8850 if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
8851 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
8852 DAG.getConstant(Imm, dl, MVT::i32));
8853 }
8854
8855 // Check for Neon shuffles that modify both input vectors in place.
8856 // If both results are used, i.e., if there are two shuffles with the same
8857 // source operands and with masks corresponding to both results of one of
8858 // these operations, DAG memoization will ensure that a single node is
8859 // used for both shuffles.
8860 unsigned WhichResult = 0;
8861 bool isV_UNDEF = false;
8862 if (ST->hasNEON()) {
8863 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8864 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
8865 if (isV_UNDEF)
8866 V2 = V1;
8867 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
8868 .getValue(WhichResult);
8869 }
8870 }
8871 if (ST->hasMVEIntegerOps()) {
8872 if (isVMOVNMask(ShuffleMask, VT, false, false))
8873 return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1,
8874 DAG.getConstant(0, dl, MVT::i32));
8875 if (isVMOVNMask(ShuffleMask, VT, true, false))
8876 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2,
8877 DAG.getConstant(1, dl, MVT::i32));
8878 if (isVMOVNMask(ShuffleMask, VT, true, true))
8879 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V1,
8880 DAG.getConstant(1, dl, MVT::i32));
8881 }
8882
8883 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
8884 // shuffles that produce a result larger than their operands with:
8885 // shuffle(concat(v1, undef), concat(v2, undef))
8886 // ->
8887 // shuffle(concat(v1, v2), undef)
8888 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
8889 //
8890 // This is useful in the general case, but there are special cases where
8891 // native shuffles produce larger results: the two-result ops.
8892 //
8893 // Look through the concat when lowering them:
8894 // shuffle(concat(v1, v2), undef)
8895 // ->
8896 // concat(VZIP(v1, v2):0, :1)
8897 //
8898 if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
8899 SDValue SubV1 = V1->getOperand(0);
8900 SDValue SubV2 = V1->getOperand(1);
8901 EVT SubVT = SubV1.getValueType();
8902
8903 // We expect these to have been canonicalized to -1.
8904 assert(llvm::all_of(ShuffleMask, [&](int i) {
8905 return i < (int)VT.getVectorNumElements();
8906 }) && "Unexpected shuffle index into UNDEF operand!");
8907
8908 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8909 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
8910 if (isV_UNDEF)
8911 SubV2 = SubV1;
8912 assert((WhichResult == 0) &&
8913 "In-place shuffle of concat can only have one result!");
8914 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
8915 SubV1, SubV2);
8916 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
8917 Res.getValue(1));
8918 }
8919 }
8920 }
8921
8922 if (ST->hasMVEIntegerOps() && EltSize <= 32) {
8923 if (SDValue V = LowerVECTOR_SHUFFLEUsingOneOff(Op, ShuffleMask, DAG))
8924 return V;
8925
8926 for (bool Top : {false, true}) {
8927 for (bool SingleSource : {false, true}) {
8928 if (isTruncMask(ShuffleMask, VT, Top, SingleSource)) {
8929 MVT FromSVT = MVT::getIntegerVT(EltSize * 2);
8930 MVT FromVT = MVT::getVectorVT(FromSVT, ShuffleMask.size() / 2);
8931 SDValue Lo = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT, V1);
8932 SDValue Hi = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT,
8933 SingleSource ? V1 : V2);
8934 if (Top) {
8935 SDValue Amt = DAG.getConstant(EltSize, dl, FromVT);
8936 Lo = DAG.getNode(ISD::SRL, dl, FromVT, Lo, Amt);
8937 Hi = DAG.getNode(ISD::SRL, dl, FromVT, Hi, Amt);
8938 }
8939 return DAG.getNode(ARMISD::MVETRUNC, dl, VT, Lo, Hi);
8940 }
8941 }
8942 }
8943 }
8944
8945 // If the shuffle is not directly supported and it has 4 elements, use
8946 // the PerfectShuffle-generated table to synthesize it from other shuffles.
8947 unsigned NumElts = VT.getVectorNumElements();
8948 if (NumElts == 4) {
8949 unsigned PFIndexes[4];
8950 for (unsigned i = 0; i != 4; ++i) {
8951 if (ShuffleMask[i] < 0)
8952 PFIndexes[i] = 8;
8953 else
8954 PFIndexes[i] = ShuffleMask[i];
8955 }
8956
8957 // Compute the index in the perfect shuffle table.
8958 unsigned PFTableIndex =
8959 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8960 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8961 unsigned Cost = (PFEntry >> 30);
8962
8963 if (Cost <= 4) {
8964 if (ST->hasNEON())
8965 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8966 else if (isLegalMVEShuffleOp(PFEntry)) {
8967 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8968 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8969 unsigned PFEntryLHS = PerfectShuffleTable[LHSID];
8970 unsigned PFEntryRHS = PerfectShuffleTable[RHSID];
8971 if (isLegalMVEShuffleOp(PFEntryLHS) && isLegalMVEShuffleOp(PFEntryRHS))
8972 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8973 }
8974 }
8975 }
8976
8977 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
8978 if (EltSize >= 32) {
8979 // Do the expansion with floating-point types, since that is what the VFP
8980 // registers are defined to use, and since i64 is not legal.
8981 EVT EltVT = EVT::getFloatingPointVT(EltSize);
8982 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8983 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
8984 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
8986 for (unsigned i = 0; i < NumElts; ++i) {
8987 if (ShuffleMask[i] < 0)
8988 Ops.push_back(DAG.getUNDEF(EltVT));
8989 else
8990 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
8991 ShuffleMask[i] < (int)NumElts ? V1 : V2,
8992 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
8993 dl, MVT::i32)));
8994 }
8995 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8996 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8997 }
8998
8999 if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
9000 isReverseMask(ShuffleMask, VT))
9001 return LowerReverse_VECTOR_SHUFFLE(Op, DAG);
9002
9003 if (ST->hasNEON() && VT == MVT::v8i8)
9004 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
9005 return NewOp;
9006
9007 if (ST->hasMVEIntegerOps())
9008 if (SDValue NewOp = LowerVECTOR_SHUFFLEUsingMovs(Op, ShuffleMask, DAG))
9009 return NewOp;
9010
9011 return SDValue();
9012}
9013
9015 const ARMSubtarget *ST) {
9016 EVT VecVT = Op.getOperand(0).getValueType();
9017 SDLoc dl(Op);
9018
9019 assert(ST->hasMVEIntegerOps() &&
9020 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
9021
9022 SDValue Conv =
9023 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9024 unsigned Lane = Op.getConstantOperandVal(2);
9025 unsigned LaneWidth =
9027 unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
9028 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32,
9029 Op.getOperand(1), DAG.getValueType(MVT::i1));
9030 SDValue BFI = DAG.getNode(ARMISD::BFI, dl, MVT::i32, Conv, Ext,
9031 DAG.getConstant(~Mask, dl, MVT::i32));
9032 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), BFI);
9033}
9034
9035SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
9036 SelectionDAG &DAG) const {
9037 // INSERT_VECTOR_ELT is legal only for immediate indexes.
9038 SDValue Lane = Op.getOperand(2);
9039 if (!isa<ConstantSDNode>(Lane))
9040 return SDValue();
9041
9042 SDValue Elt = Op.getOperand(1);
9043 EVT EltVT = Elt.getValueType();
9044
9045 if (Subtarget->hasMVEIntegerOps() &&
9046 Op.getValueType().getScalarSizeInBits() == 1)
9047 return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);
9048
9049 if (getTypeAction(*DAG.getContext(), EltVT) ==
9051 // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
9052 // but the type system will try to do that if we don't intervene.
9053 // Reinterpret any such vector-element insertion as one with the
9054 // corresponding integer types.
9055
9056 SDLoc dl(Op);
9057
9058 EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
9059 assert(getTypeAction(*DAG.getContext(), IEltVT) !=
9061
9062 SDValue VecIn = Op.getOperand(0);
9063 EVT VecVT = VecIn.getValueType();
9064 EVT IVecVT = EVT::getVectorVT(*DAG.getContext(), IEltVT,
9065 VecVT.getVectorNumElements());
9066
9067 SDValue IElt = DAG.getNode(ISD::BITCAST, dl, IEltVT, Elt);
9068 SDValue IVecIn = DAG.getNode(ISD::BITCAST, dl, IVecVT, VecIn);
9069 SDValue IVecOut = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVecVT,
9070 IVecIn, IElt, Lane);
9071 return DAG.getNode(ISD::BITCAST, dl, VecVT, IVecOut);
9072 }
9073
9074 return Op;
9075}
9076
9078 const ARMSubtarget *ST) {
9079 EVT VecVT = Op.getOperand(0).getValueType();
9080 SDLoc dl(Op);
9081
9082 assert(ST->hasMVEIntegerOps() &&
9083 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
9084
9085 SDValue Conv =
9086 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9087 unsigned Lane = Op.getConstantOperandVal(1);
9088 unsigned LaneWidth =
9090 SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv,
9091 DAG.getConstant(Lane * LaneWidth, dl, MVT::i32));
9092 return Shift;
9093}
9094
9096 const ARMSubtarget *ST) {
9097 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
9098 SDValue Lane = Op.getOperand(1);
9099 if (!isa<ConstantSDNode>(Lane))
9100 return SDValue();
9101
9102 SDValue Vec = Op.getOperand(0);
9103 EVT VT = Vec.getValueType();
9104
9105 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9106 return LowerEXTRACT_VECTOR_ELT_i1(Op, DAG, ST);
9107
9108 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
9109 SDLoc dl(Op);
9110 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
9111 }
9112
9113 return Op;
9114}
9115
9117 const ARMSubtarget *ST) {
9118 SDLoc dl(Op);
9119 assert(Op.getValueType().getScalarSizeInBits() == 1 &&
9120 "Unexpected custom CONCAT_VECTORS lowering");
9122 "Unexpected custom CONCAT_VECTORS lowering");
9123 assert(ST->hasMVEIntegerOps() &&
9124 "CONCAT_VECTORS lowering only supported for MVE");
9125
9126 auto ConcatPair = [&](SDValue V1, SDValue V2) {
9127 EVT Op1VT = V1.getValueType();
9128 EVT Op2VT = V2.getValueType();
9129 assert(Op1VT == Op2VT && "Operand types don't match!");
9130 assert((Op1VT == MVT::v2i1 || Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) &&
9131 "Unexpected i1 concat operations!");
9132 EVT VT = Op1VT.getDoubleNumVectorElementsVT(*DAG.getContext());
9133
9134 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9135 SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
9136
9137 // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
9138 // promoted to v8i16, etc.
9139 MVT ElType =
9141 unsigned NumElts = 2 * Op1VT.getVectorNumElements();
9142
9143 EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
9144 if (Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) {
9145 // Use MVETRUNC to truncate the combined NewV1::NewV2 into the smaller
9146 // ConcatVT.
9147 SDValue ConVec =
9148 DAG.getNode(ARMISD::MVETRUNC, dl, ConcatVT, NewV1, NewV2);
9149 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9150 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9151 }
9152
9153 // Extract the vector elements from Op1 and Op2 one by one and truncate them
9154 // to be the right size for the destination. For example, if Op1 is v4i1
9155 // then the promoted vector is v4i32. The result of concatenation gives a
9156 // v8i1, which when promoted is v8i16. That means each i32 element from Op1
9157 // needs truncating to i16 and inserting in the result.
9158 auto ExtractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
9159 EVT NewVT = NewV.getValueType();
9160 EVT ConcatVT = ConVec.getValueType();
9161 unsigned ExtScale = 1;
9162 if (NewVT == MVT::v2f64) {
9163 NewV = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, NewV);
9164 ExtScale = 2;
9165 }
9166 for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
9167 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
9168 DAG.getIntPtrConstant(i * ExtScale, dl));
9169 ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
9170 DAG.getConstant(j, dl, MVT::i32));
9171 }
9172 return ConVec;
9173 };
9174 unsigned j = 0;
9175 SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
9176 ConVec = ExtractInto(NewV1, ConVec, j);
9177 ConVec = ExtractInto(NewV2, ConVec, j);
9178
9179 // Now return the result of comparing the subvector with zero, which will
9180 // generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9181 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9182 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9183 };
9184
9185 // Concat each pair of subvectors and pack into the lower half of the array.
9186 SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
9187 while (ConcatOps.size() > 1) {
9188 for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
9189 SDValue V1 = ConcatOps[I];
9190 SDValue V2 = ConcatOps[I + 1];
9191 ConcatOps[I / 2] = ConcatPair(V1, V2);
9192 }
9193 ConcatOps.resize(ConcatOps.size() / 2);
9194 }
9195 return ConcatOps[0];
9196}
9197
9199 const ARMSubtarget *ST) {
9200 EVT VT = Op->getValueType(0);
9201 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9202 return LowerCONCAT_VECTORS_i1(Op, DAG, ST);
9203
9204 // The only time a CONCAT_VECTORS operation can have legal types is when
9205 // two 64-bit vectors are concatenated to a 128-bit vector.
9206 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
9207 "unexpected CONCAT_VECTORS");
9208 SDLoc dl(Op);
9209 SDValue Val = DAG.getUNDEF(MVT::v2f64);
9210 SDValue Op0 = Op.getOperand(0);
9211 SDValue Op1 = Op.getOperand(1);
9212 if (!Op0.isUndef())
9213 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9214 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
9215 DAG.getIntPtrConstant(0, dl));
9216 if (!Op1.isUndef())
9217 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9218 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
9219 DAG.getIntPtrConstant(1, dl));
9220 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
9221}
9222
9224 const ARMSubtarget *ST) {
9225 SDValue V1 = Op.getOperand(0);
9226 SDValue V2 = Op.getOperand(1);
9227 SDLoc dl(Op);
9228 EVT VT = Op.getValueType();
9229 EVT Op1VT = V1.getValueType();
9230 unsigned NumElts = VT.getVectorNumElements();
9231 unsigned Index = V2->getAsZExtVal();
9232
9233 assert(VT.getScalarSizeInBits() == 1 &&
9234 "Unexpected custom EXTRACT_SUBVECTOR lowering");
9235 assert(ST->hasMVEIntegerOps() &&
9236 "EXTRACT_SUBVECTOR lowering only supported for MVE");
9237
9238 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9239
9240 // We now have Op1 promoted to a vector of integers, where v8i1 gets
9241 // promoted to v8i16, etc.
9242
9244
9245 if (NumElts == 2) {
9246 EVT SubVT = MVT::v4i32;
9247 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9248 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j += 2) {
9249 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9250 DAG.getIntPtrConstant(i, dl));
9251 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9252 DAG.getConstant(j, dl, MVT::i32));
9253 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9254 DAG.getConstant(j + 1, dl, MVT::i32));
9255 }
9256 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, SubVec,
9257 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9258 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
9259 }
9260
9261 EVT SubVT = MVT::getVectorVT(ElType, NumElts);
9262 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9263 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
9264 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9265 DAG.getIntPtrConstant(i, dl));
9266 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9267 DAG.getConstant(j, dl, MVT::i32));
9268 }
9269
9270 // Now return the result of comparing the subvector with zero,
9271 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9272 return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec,
9273 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9274}
9275
9276// Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0).
9278 const ARMSubtarget *ST) {
9279 assert(ST->hasMVEIntegerOps() && "Expected MVE!");
9280 EVT VT = N->getValueType(0);
9281 assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&
9282 "Expected a vector i1 type!");
9283 SDValue Op = N->getOperand(0);
9284 EVT FromVT = Op.getValueType();
9285 SDLoc DL(N);
9286
9287 SDValue And =
9288 DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT));
9289 return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT),
9290 DAG.getCondCode(ISD::SETNE));
9291}
9292
9294 const ARMSubtarget *Subtarget) {
9295 if (!Subtarget->hasMVEIntegerOps())
9296 return SDValue();
9297
9298 EVT ToVT = N->getValueType(0);
9299 if (ToVT.getScalarType() == MVT::i1)
9300 return LowerTruncatei1(N, DAG, Subtarget);
9301
9302 // MVE does not have a single instruction to perform the truncation of a v4i32
9303 // into the lower half of a v8i16, in the same way that a NEON vmovn would.
9304 // Most of the instructions in MVE follow the 'Beats' system, where moving
9305 // values from different lanes is usually something that the instructions
9306 // avoid.
9307 //
9308 // Instead it has top/bottom instructions such as VMOVLT/B and VMOVNT/B,
9309 // which take a the top/bottom half of a larger lane and extend it (or do the
9310 // opposite, truncating into the top/bottom lane from a larger lane). Note
9311 // that because of the way we widen lanes, a v4i16 is really a v4i32 using the
9312 // bottom 16bits from each vector lane. This works really well with T/B
9313 // instructions, but that doesn't extend to v8i32->v8i16 where the lanes need
9314 // to move order.
9315 //
9316 // But truncates and sext/zext are always going to be fairly common from llvm.
9317 // We have several options for how to deal with them:
9318 // - Wherever possible combine them into an instruction that makes them
9319 // "free". This includes loads/stores, which can perform the trunc as part
9320 // of the memory operation. Or certain shuffles that can be turned into
9321 // VMOVN/VMOVL.
9322 // - Lane Interleaving to transform blocks surrounded by ext/trunc. So
9323 // trunc(mul(sext(a), sext(b))) may become
9324 // VMOVNT(VMUL(VMOVLB(a), VMOVLB(b)), VMUL(VMOVLT(a), VMOVLT(b))). (Which in
9325 // this case can use VMULL). This is performed in the
9326 // MVELaneInterleavingPass.
9327 // - Otherwise we have an option. By default we would expand the
9328 // zext/sext/trunc into a series of lane extract/inserts going via GPR
9329 // registers. One for each vector lane in the vector. This can obviously be
9330 // very expensive.
9331 // - The other option is to use the fact that loads/store can extend/truncate
9332 // to turn a trunc into two truncating stack stores and a stack reload. This
9333 // becomes 3 back-to-back memory operations, but at least that is less than
9334 // all the insert/extracts.
9335 //
9336 // In order to do the last, we convert certain trunc's into MVETRUNC, which
9337 // are either optimized where they can be, or eventually lowered into stack
9338 // stores/loads. This prevents us from splitting a v8i16 trunc into two stores
9339 // two early, where other instructions would be better, and stops us from
9340 // having to reconstruct multiple buildvector shuffles into loads/stores.
9341 if (ToVT != MVT::v8i16 && ToVT != MVT::v16i8)
9342 return SDValue();
9343 EVT FromVT = N->getOperand(0).getValueType();
9344 if (FromVT != MVT::v8i32 && FromVT != MVT::v16i16)
9345 return SDValue();
9346
9347 SDValue Lo, Hi;
9348 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
9349 SDLoc DL(N);
9350 return DAG.getNode(ARMISD::MVETRUNC, DL, ToVT, Lo, Hi);
9351}
9352
9354 const ARMSubtarget *Subtarget) {
9355 if (!Subtarget->hasMVEIntegerOps())
9356 return SDValue();
9357
9358 // See LowerTruncate above for an explanation of MVEEXT/MVETRUNC.
9359
9360 EVT ToVT = N->getValueType(0);
9361 if (ToVT != MVT::v16i32 && ToVT != MVT::v8i32 && ToVT != MVT::v16i16)
9362 return SDValue();
9363 SDValue Op = N->getOperand(0);
9364 EVT FromVT = Op.getValueType();
9365 if (FromVT != MVT::v8i16 && FromVT != MVT::v16i8)
9366 return SDValue();
9367
9368 SDLoc DL(N);
9369 EVT ExtVT = ToVT.getHalfNumVectorElementsVT(*DAG.getContext());
9370 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8)
9371 ExtVT = MVT::v8i16;
9372
9373 unsigned Opcode =
9375 SDValue Ext = DAG.getNode(Opcode, DL, DAG.getVTList(ExtVT, ExtVT), Op);
9376 SDValue Ext1 = Ext.getValue(1);
9377
9378 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8) {
9379 Ext = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext);
9380 Ext1 = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext1);
9381 }
9382
9383 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Ext, Ext1);
9384}
9385
9386/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
9387/// element has been zero/sign-extended, depending on the isSigned parameter,
9388/// from an integer type half its size.
9390 bool isSigned) {
9391 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
9392 EVT VT = N->getValueType(0);
9393 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
9394 SDNode *BVN = N->getOperand(0).getNode();
9395 if (BVN->getValueType(0) != MVT::v4i32 ||
9396 BVN->getOpcode() != ISD::BUILD_VECTOR)
9397 return false;
9398 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9399 unsigned HiElt = 1 - LoElt;
9400 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
9401 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
9402 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
9403 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
9404 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
9405 return false;
9406 if (isSigned) {
9407 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
9408 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
9409 return true;
9410 } else {
9411 if (Hi0->isZero() && Hi1->isZero())
9412 return true;
9413 }
9414 return false;
9415 }
9416
9417 if (N->getOpcode() != ISD::BUILD_VECTOR)
9418 return false;
9419
9420 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
9421 SDNode *Elt = N->getOperand(i).getNode();
9422 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
9423 unsigned EltSize = VT.getScalarSizeInBits();
9424 unsigned HalfSize = EltSize / 2;
9425 if (isSigned) {
9426 if (!isIntN(HalfSize, C->getSExtValue()))
9427 return false;
9428 } else {
9429 if (!isUIntN(HalfSize, C->getZExtValue()))
9430 return false;
9431 }
9432 continue;
9433 }
9434 return false;
9435 }
9436
9437 return true;
9438}
9439
9440/// isSignExtended - Check if a node is a vector value that is sign-extended
9441/// or a constant BUILD_VECTOR with sign-extended elements.
9443 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
9444 return true;
9445 if (isExtendedBUILD_VECTOR(N, DAG, true))
9446 return true;
9447 return false;
9448}
9449
9450/// isZeroExtended - Check if a node is a vector value that is zero-extended (or
9451/// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
9453 if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
9455 return true;
9456 if (isExtendedBUILD_VECTOR(N, DAG, false))
9457 return true;
9458 return false;
9459}
9460
9461static EVT getExtensionTo64Bits(const EVT &OrigVT) {
9462 if (OrigVT.getSizeInBits() >= 64)
9463 return OrigVT;
9464
9465 assert(OrigVT.isSimple() && "Expecting a simple value type");
9466
9467 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
9468 switch (OrigSimpleTy) {
9469 default: llvm_unreachable("Unexpected Vector Type");
9470 case MVT::v2i8:
9471 case MVT::v2i16:
9472 return MVT::v2i32;
9473 case MVT::v4i8:
9474 return MVT::v4i16;
9475 }
9476}
9477
9478/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
9479/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
9480/// We insert the required extension here to get the vector to fill a D register.
9482 const EVT &OrigTy,
9483 const EVT &ExtTy,
9484 unsigned ExtOpcode) {
9485 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
9486 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
9487 // 64-bits we need to insert a new extension so that it will be 64-bits.
9488 assert(ExtTy.is128BitVector() && "Unexpected extension size");
9489 if (OrigTy.getSizeInBits() >= 64)
9490 return N;
9491
9492 // Must extend size to at least 64 bits to be used as an operand for VMULL.
9493 EVT NewVT = getExtensionTo64Bits(OrigTy);
9494
9495 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
9496}
9497
9498/// SkipLoadExtensionForVMULL - return a load of the original vector size that
9499/// does not do any sign/zero extension. If the original vector is less
9500/// than 64 bits, an appropriate extension will be added after the load to
9501/// reach a total size of 64 bits. We have to add the extension separately
9502/// because ARM does not have a sign/zero extending load for vectors.
9504 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
9505
9506 // The load already has the right type.
9507 if (ExtendedTy == LD->getMemoryVT())
9508 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
9509 LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),
9510 LD->getMemOperand()->getFlags());
9511
9512 // We need to create a zextload/sextload. We cannot just create a load
9513 // followed by a zext/zext node because LowerMUL is also run during normal
9514 // operation legalization where we can't create illegal types.
9515 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
9516 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
9517 LD->getMemoryVT(), LD->getAlign(),
9518 LD->getMemOperand()->getFlags());
9519}
9520
9521/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
9522/// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return
9523/// the unextended value. The unextended vector should be 64 bits so that it can
9524/// be used as an operand to a VMULL instruction. If the original vector size
9525/// before extension is less than 64 bits we add a an extension to resize
9526/// the vector to 64 bits.
9528 if (N->getOpcode() == ISD::SIGN_EXTEND ||
9529 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
9530 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
9531 N->getOperand(0)->getValueType(0),
9532 N->getValueType(0),
9533 N->getOpcode());
9534
9535 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9536 assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&
9537 "Expected extending load");
9538
9539 SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
9540 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
9541 unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9542 SDValue extLoad =
9543 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
9544 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
9545
9546 return newLoad;
9547 }
9548
9549 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
9550 // have been legalized as a BITCAST from v4i32.
9551 if (N->getOpcode() == ISD::BITCAST) {
9552 SDNode *BVN = N->getOperand(0).getNode();
9554 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
9555 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9556 return DAG.getBuildVector(
9557 MVT::v2i32, SDLoc(N),
9558 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
9559 }
9560 // Construct a new BUILD_VECTOR with elements truncated to half the size.
9561 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
9562 EVT VT = N->getValueType(0);
9563 unsigned EltSize = VT.getScalarSizeInBits() / 2;
9564 unsigned NumElts = VT.getVectorNumElements();
9565 MVT TruncVT = MVT::getIntegerVT(EltSize);
9567 SDLoc dl(N);
9568 for (unsigned i = 0; i != NumElts; ++i) {
9569 const APInt &CInt = N->getConstantOperandAPInt(i);
9570 // Element types smaller than 32 bits are not legal, so use i32 elements.
9571 // The values are implicitly truncated so sext vs. zext doesn't matter.
9572 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
9573 }
9574 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
9575}
9576
9577static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
9578 unsigned Opcode = N->getOpcode();
9579 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9580 SDNode *N0 = N->getOperand(0).getNode();
9581 SDNode *N1 = N->getOperand(1).getNode();
9582 return N0->hasOneUse() && N1->hasOneUse() &&
9583 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
9584 }
9585 return false;
9586}
9587
9588static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
9589 unsigned Opcode = N->getOpcode();
9590 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9591 SDNode *N0 = N->getOperand(0).getNode();
9592 SDNode *N1 = N->getOperand(1).getNode();
9593 return N0->hasOneUse() && N1->hasOneUse() &&
9594 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
9595 }
9596 return false;
9597}
9598
9600 // Multiplications are only custom-lowered for 128-bit vectors so that
9601 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
9602 EVT VT = Op.getValueType();
9603 assert(VT.is128BitVector() && VT.isInteger() &&
9604 "unexpected type for custom-lowering ISD::MUL");
9605 SDNode *N0 = Op.getOperand(0).getNode();
9606 SDNode *N1 = Op.getOperand(1).getNode();
9607 unsigned NewOpc = 0;
9608 bool isMLA = false;
9609 bool isN0SExt = isSignExtended(N0, DAG);
9610 bool isN1SExt = isSignExtended(N1, DAG);
9611 if (isN0SExt && isN1SExt)
9612 NewOpc = ARMISD::VMULLs;
9613 else {
9614 bool isN0ZExt = isZeroExtended(N0, DAG);
9615 bool isN1ZExt = isZeroExtended(N1, DAG);
9616 if (isN0ZExt && isN1ZExt)
9617 NewOpc = ARMISD::VMULLu;
9618 else if (isN1SExt || isN1ZExt) {
9619 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
9620 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
9621 if (isN1SExt && isAddSubSExt(N0, DAG)) {
9622 NewOpc = ARMISD::VMULLs;
9623 isMLA = true;
9624 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
9625 NewOpc = ARMISD::VMULLu;
9626 isMLA = true;
9627 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
9628 std::swap(N0, N1);
9629 NewOpc = ARMISD::VMULLu;
9630 isMLA = true;
9631 }
9632 }
9633
9634 if (!NewOpc) {
9635 if (VT == MVT::v2i64)
9636 // Fall through to expand this. It is not legal.
9637 return SDValue();
9638 else
9639 // Other vector multiplications are legal.
9640 return Op;
9641 }
9642 }
9643
9644 // Legalize to a VMULL instruction.
9645 SDLoc DL(Op);
9646 SDValue Op0;
9647 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
9648 if (!isMLA) {
9649 Op0 = SkipExtensionForVMULL(N0, DAG);
9651 Op1.getValueType().is64BitVector() &&
9652 "unexpected types for extended operands to VMULL");
9653 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
9654 }
9655
9656 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
9657 // isel lowering to take advantage of no-stall back to back vmul + vmla.
9658 // vmull q0, d4, d6
9659 // vmlal q0, d5, d6
9660 // is faster than
9661 // vaddl q0, d4, d5
9662 // vmovl q1, d6
9663 // vmul q0, q0, q1
9664 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
9665 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
9666 EVT Op1VT = Op1.getValueType();
9667 return DAG.getNode(N0->getOpcode(), DL, VT,
9668 DAG.getNode(NewOpc, DL, VT,
9669 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
9670 DAG.getNode(NewOpc, DL, VT,
9671 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
9672}
9673
9675 SelectionDAG &DAG) {
9676 // TODO: Should this propagate fast-math-flags?
9677
9678 // Convert to float
9679 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
9680 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
9681 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
9682 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
9683 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
9684 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
9685 // Get reciprocal estimate.
9686 // float4 recip = vrecpeq_f32(yf);
9687 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9688 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9689 Y);
9690 // Because char has a smaller range than uchar, we can actually get away
9691 // without any newton steps. This requires that we use a weird bias
9692 // of 0xb000, however (again, this has been exhaustively tested).
9693 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
9694 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
9695 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
9696 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
9697 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
9698 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
9699 // Convert back to short.
9700 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
9701 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
9702 return X;
9703}
9704
9706 SelectionDAG &DAG) {
9707 // TODO: Should this propagate fast-math-flags?
9708
9709 SDValue N2;
9710 // Convert to float.
9711 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
9712 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
9713 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
9714 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
9715 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9716 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9717
9718 // Use reciprocal estimate and one refinement step.
9719 // float4 recip = vrecpeq_f32(yf);
9720 // recip *= vrecpsq_f32(yf, recip);
9721 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9722 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9723 N1);
9724 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9725 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9726 N1, N2);
9727 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9728 // Because short has a smaller range than ushort, we can actually get away
9729 // with only a single newton step. This requires that we use a weird bias
9730 // of 89, however (again, this has been exhaustively tested).
9731 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
9732 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9733 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9734 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
9735 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9736 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9737 // Convert back to integer and return.
9738 // return vmovn_s32(vcvt_s32_f32(result));
9739 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9740 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9741 return N0;
9742}
9743
9745 const ARMSubtarget *ST) {
9746 EVT VT = Op.getValueType();
9747 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9748 "unexpected type for custom-lowering ISD::SDIV");
9749
9750 SDLoc dl(Op);
9751 SDValue N0 = Op.getOperand(0);
9752 SDValue N1 = Op.getOperand(1);
9753 SDValue N2, N3;
9754
9755 if (VT == MVT::v8i8) {
9756 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
9757 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
9758
9759 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9760 DAG.getIntPtrConstant(4, dl));
9761 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9762 DAG.getIntPtrConstant(4, dl));
9763 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9764 DAG.getIntPtrConstant(0, dl));
9765 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9766 DAG.getIntPtrConstant(0, dl));
9767
9768 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
9769 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
9770
9771 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9772 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9773
9774 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
9775 return N0;
9776 }
9777 return LowerSDIV_v4i16(N0, N1, dl, DAG);
9778}
9779
9781 const ARMSubtarget *ST) {
9782 // TODO: Should this propagate fast-math-flags?
9783 EVT VT = Op.getValueType();
9784 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9785 "unexpected type for custom-lowering ISD::UDIV");
9786
9787 SDLoc dl(Op);
9788 SDValue N0 = Op.getOperand(0);
9789 SDValue N1 = Op.getOperand(1);
9790 SDValue N2, N3;
9791
9792 if (VT == MVT::v8i8) {
9793 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
9794 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
9795
9796 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9797 DAG.getIntPtrConstant(4, dl));
9798 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9799 DAG.getIntPtrConstant(4, dl));
9800 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9801 DAG.getIntPtrConstant(0, dl));
9802 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9803 DAG.getIntPtrConstant(0, dl));
9804
9805 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
9806 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
9807
9808 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9809 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9810
9811 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
9812 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
9813 MVT::i32),
9814 N0);
9815 return N0;
9816 }
9817
9818 // v4i16 sdiv ... Convert to float.
9819 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
9820 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
9821 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
9822 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
9823 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9824 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9825
9826 // Use reciprocal estimate and two refinement steps.
9827 // float4 recip = vrecpeq_f32(yf);
9828 // recip *= vrecpsq_f32(yf, recip);
9829 // recip *= vrecpsq_f32(yf, recip);
9830 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9831 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9832 BN1);
9833 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9834 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9835 BN1, N2);
9836 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9837 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9838 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9839 BN1, N2);
9840 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9841 // Simply multiplying by the reciprocal estimate can leave us a few ulps
9842 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
9843 // and that it will never cause us to return an answer too large).
9844 // float4 result = as_float4(as_int4(xf*recip) + 2);
9845 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9846 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9847 N1 = DAG.getConstant(2, dl, MVT::v4i32);
9848 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9849 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9850 // Convert back to integer and return.
9851 // return vmovn_u32(vcvt_s32_f32(result));
9852 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9853 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9854 return N0;
9855}
9856
9858 SDNode *N = Op.getNode();
9859 EVT VT = N->getValueType(0);
9860 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
9861
9862 SDValue Carry = Op.getOperand(2);
9863
9864 SDLoc DL(Op);
9865
9866 SDValue Result;
9867 if (Op.getOpcode() == ISD::UADDO_CARRY) {
9868 // This converts the boolean value carry into the carry flag.
9869 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9870
9871 // Do the addition proper using the carry flag we wanted.
9872 Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
9873 Op.getOperand(1), Carry);
9874
9875 // Now convert the carry flag into a boolean value.
9876 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9877 } else {
9878 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
9879 // have to invert the carry first.
9880 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9881 DAG.getConstant(1, DL, MVT::i32), Carry);
9882 // This converts the boolean value carry into the carry flag.
9883 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9884
9885 // Do the subtraction proper using the carry flag we wanted.
9886 Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
9887 Op.getOperand(1), Carry);
9888
9889 // Now convert the carry flag into a boolean value.
9890 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9891 // But the carry returned by ARMISD::SUBE is not a borrow as expected
9892 // by ISD::USUBO_CARRY, so compute 1 - C.
9893 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9894 DAG.getConstant(1, DL, MVT::i32), Carry);
9895 }
9896
9897 // Return both values.
9898 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
9899}
9900
9901SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
9902 assert(Subtarget->isTargetDarwin());
9903
9904 // For iOS, we want to call an alternative entry point: __sincos_stret,
9905 // return values are passed via sret.
9906 SDLoc dl(Op);
9907 SDValue Arg = Op.getOperand(0);
9908 EVT ArgVT = Arg.getValueType();
9909 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
9910 auto PtrVT = getPointerTy(DAG.getDataLayout());
9911
9913 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9914
9915 // Pair of floats / doubles used to pass the result.
9916 Type *RetTy = StructType::get(ArgTy, ArgTy);
9917 auto &DL = DAG.getDataLayout();
9918
9920 bool ShouldUseSRet = Subtarget->isAPCS_ABI();
9921 SDValue SRet;
9922 if (ShouldUseSRet) {
9923 // Create stack object for sret.
9924 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
9925 const Align StackAlign = DL.getPrefTypeAlign(RetTy);
9926 int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
9927 SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
9928
9929 ArgListEntry Entry;
9930 Entry.Node = SRet;
9931 Entry.Ty = PointerType::getUnqual(RetTy->getContext());
9932 Entry.IsSExt = false;
9933 Entry.IsZExt = false;
9934 Entry.IsSRet = true;
9935 Args.push_back(Entry);
9937 }
9938
9939 ArgListEntry Entry;
9940 Entry.Node = Arg;
9941 Entry.Ty = ArgTy;
9942 Entry.IsSExt = false;
9943 Entry.IsZExt = false;
9944 Args.push_back(Entry);
9945
9946 RTLIB::Libcall LC =
9947 (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
9948 const char *LibcallName = getLibcallName(LC);
9950 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
9951
9953 CLI.setDebugLoc(dl)
9954 .setChain(DAG.getEntryNode())
9955 .setCallee(CC, RetTy, Callee, std::move(Args))
9956 .setDiscardResult(ShouldUseSRet);
9957 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
9958
9959 if (!ShouldUseSRet)
9960 return CallResult.first;
9961
9962 SDValue LoadSin =
9963 DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
9964
9965 // Address of cos field.
9966 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
9967 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
9968 SDValue LoadCos =
9969 DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
9970
9971 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
9972 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
9973 LoadSin.getValue(0), LoadCos.getValue(0));
9974}
9975
9976SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
9977 bool Signed,
9978 SDValue &Chain) const {
9979 EVT VT = Op.getValueType();
9980 assert((VT == MVT::i32 || VT == MVT::i64) &&
9981 "unexpected type for custom lowering DIV");
9982 SDLoc dl(Op);
9983
9984 const auto &DL = DAG.getDataLayout();
9985 const auto &TLI = DAG.getTargetLoweringInfo();
9986
9987 const char *Name = nullptr;
9988 if (Signed)
9989 Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
9990 else
9991 Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
9992
9994
9996
9997 for (auto AI : {1, 0}) {
9998 ArgListEntry Arg;
9999 Arg.Node = Op.getOperand(AI);
10000 Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
10001 Args.push_back(Arg);
10002 }
10003
10004 CallLoweringInfo CLI(DAG);
10005 CLI.setDebugLoc(dl)
10006 .setChain(Chain)
10008 ES, std::move(Args));
10009
10010 return LowerCallTo(CLI).first;
10011}
10012
10013// This is a code size optimisation: return the original SDIV node to
10014// DAGCombiner when we don't want to expand SDIV into a sequence of
10015// instructions, and an empty node otherwise which will cause the
10016// SDIV to be expanded in DAGCombine.
10017SDValue
10018ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
10019 SelectionDAG &DAG,
10020 SmallVectorImpl<SDNode *> &Created) const {
10021 // TODO: Support SREM
10022 if (N->getOpcode() != ISD::SDIV)
10023 return SDValue();
10024
10025 const auto &ST = DAG.getSubtarget<ARMSubtarget>();
10026 const bool MinSize = ST.hasMinSize();
10027 const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
10028 : ST.hasDivideInARMMode();
10029
10030 // Don't touch vector types; rewriting this may lead to scalarizing
10031 // the int divs.
10032 if (N->getOperand(0).getValueType().isVector())
10033 return SDValue();
10034
10035 // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
10036 // hwdiv support for this to be really profitable.
10037 if (!(MinSize && HasDivide))
10038 return SDValue();
10039
10040 // ARM mode is a bit simpler than Thumb: we can handle large power
10041 // of 2 immediates with 1 mov instruction; no further checks required,
10042 // just return the sdiv node.
10043 if (!ST.isThumb())
10044 return SDValue(N, 0);
10045
10046 // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
10047 // and thus lose the code size benefits of a MOVS that requires only 2.
10048 // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
10049 // but as it's doing exactly this, it's not worth the trouble to get TTI.
10050 if (Divisor.sgt(128))
10051 return SDValue();
10052
10053 return SDValue(N, 0);
10054}
10055
10056SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
10057 bool Signed) const {
10058 assert(Op.getValueType() == MVT::i32 &&
10059 "unexpected type for custom lowering DIV");
10060 SDLoc dl(Op);
10061
10062 SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
10063 DAG.getEntryNode(), Op.getOperand(1));
10064
10065 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10066}
10067
10069 SDLoc DL(N);
10070 SDValue Op = N->getOperand(1);
10071 if (N->getValueType(0) == MVT::i32)
10072 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
10073 SDValue Lo, Hi;
10074 std::tie(Lo, Hi) = DAG.SplitScalar(Op, DL, MVT::i32, MVT::i32);
10075 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
10076 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
10077}
10078
10079void ARMTargetLowering::ExpandDIV_Windows(
10080 SDValue Op, SelectionDAG &DAG, bool Signed,
10082 const auto &DL = DAG.getDataLayout();
10083 const auto &TLI = DAG.getTargetLoweringInfo();
10084
10085 assert(Op.getValueType() == MVT::i64 &&
10086 "unexpected type for custom lowering DIV");
10087 SDLoc dl(Op);
10088
10089 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
10090
10091 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10092
10093 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
10094 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
10095 DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
10096 Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
10097
10098 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lower, Upper));
10099}
10100
10102 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
10103 EVT MemVT = LD->getMemoryVT();
10104 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10105 MemVT == MVT::v16i1) &&
10106 "Expected a predicate type!");
10107 assert(MemVT == Op.getValueType());
10108 assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
10109 "Expected a non-extending load");
10110 assert(LD->isUnindexed() && "Expected a unindexed load");
10111
10112 // The basic MVE VLDR on a v2i1/v4i1/v8i1 actually loads the entire 16bit
10113 // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
10114 // need to make sure that 8/4/2 bits are actually loaded into the correct
10115 // place, which means loading the value and then shuffling the values into
10116 // the bottom bits of the predicate.
10117 // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
10118 // for BE).
10119 // Speaking of BE, apparently the rest of llvm will assume a reverse order to
10120 // a natural VMSR(load), so needs to be reversed.
10121
10122 SDLoc dl(Op);
10123 SDValue Load = DAG.getExtLoad(
10124 ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
10126 LD->getMemOperand());
10127 SDValue Val = Load;
10128 if (DAG.getDataLayout().isBigEndian())
10129 Val = DAG.getNode(ISD::SRL, dl, MVT::i32,
10130 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Load),
10131 DAG.getConstant(32 - MemVT.getSizeInBits(), dl, MVT::i32));
10132 SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Val);
10133 if (MemVT != MVT::v16i1)
10134 Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred,
10135 DAG.getConstant(0, dl, MVT::i32));
10136 return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
10137}
10138
10139void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
10140 SelectionDAG &DAG) const {
10141 LoadSDNode *LD = cast<LoadSDNode>(N);
10142 EVT MemVT = LD->getMemoryVT();
10143 assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
10144
10145 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10146 !Subtarget->isThumb1Only() && LD->isVolatile() &&
10147 LD->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10148 SDLoc dl(N);
10150 ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
10151 {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
10152 SDValue Lo = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 0 : 1);
10153 SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0);
10154 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
10155 Results.append({Pair, Result.getValue(2)});
10156 }
10157}
10158
10160 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10161 EVT MemVT = ST->getMemoryVT();
10162 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10163 MemVT == MVT::v16i1) &&
10164 "Expected a predicate type!");
10165 assert(MemVT == ST->getValue().getValueType());
10166 assert(!ST->isTruncatingStore() && "Expected a non-extending store");
10167 assert(ST->isUnindexed() && "Expected a unindexed store");
10168
10169 // Only store the v2i1 or v4i1 or v8i1 worth of bits, via a buildvector with
10170 // top bits unset and a scalar store.
10171 SDLoc dl(Op);
10172 SDValue Build = ST->getValue();
10173 if (MemVT != MVT::v16i1) {
10175 for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++) {
10176 unsigned Elt = DAG.getDataLayout().isBigEndian()
10177 ? MemVT.getVectorNumElements() - I - 1
10178 : I;
10179 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build,
10180 DAG.getConstant(Elt, dl, MVT::i32)));
10181 }
10182 for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++)
10183 Ops.push_back(DAG.getUNDEF(MVT::i32));
10184 Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops);
10185 }
10186 SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
10187 if (MemVT == MVT::v16i1 && DAG.getDataLayout().isBigEndian())
10188 GRP = DAG.getNode(ISD::SRL, dl, MVT::i32,
10189 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, GRP),
10190 DAG.getConstant(16, dl, MVT::i32));
10191 return DAG.getTruncStore(
10192 ST->getChain(), dl, GRP, ST->getBasePtr(),
10194 ST->getMemOperand());
10195}
10196
10198 const ARMSubtarget *Subtarget) {
10199 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10200 EVT MemVT = ST->getMemoryVT();
10201 assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
10202
10203 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10204 !Subtarget->isThumb1Only() && ST->isVolatile() &&
10205 ST->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10206 SDNode *N = Op.getNode();
10207 SDLoc dl(N);
10208
10209 SDValue Lo = DAG.getNode(
10210 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10211 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 0 : 1, dl,
10212 MVT::i32));
10213 SDValue Hi = DAG.getNode(
10214 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10215 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 1 : 0, dl,
10216 MVT::i32));
10217
10218 return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
10219 {ST->getChain(), Lo, Hi, ST->getBasePtr()},
10220 MemVT, ST->getMemOperand());
10221 } else if (Subtarget->hasMVEIntegerOps() &&
10222 ((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10223 MemVT == MVT::v16i1))) {
10224 return LowerPredicateStore(Op, DAG);
10225 }
10226
10227 return SDValue();
10228}
10229
10230static bool isZeroVector(SDValue N) {
10231 return (ISD::isBuildVectorAllZeros(N.getNode()) ||
10232 (N->getOpcode() == ARMISD::VMOVIMM &&
10233 isNullConstant(N->getOperand(0))));
10234}
10235
10237 MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
10238 MVT VT = Op.getSimpleValueType();
10239 SDValue Mask = N->getMask();
10240 SDValue PassThru = N->getPassThru();
10241 SDLoc dl(Op);
10242
10243 if (isZeroVector(PassThru))
10244 return Op;
10245
10246 // MVE Masked loads use zero as the passthru value. Here we convert undef to
10247 // zero too, and other values are lowered to a select.
10248 SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
10249 DAG.getTargetConstant(0, dl, MVT::i32));
10250 SDValue NewLoad = DAG.getMaskedLoad(
10251 VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,
10252 N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
10253 N->getExtensionType(), N->isExpandingLoad());
10254 SDValue Combo = NewLoad;
10255 bool PassThruIsCastZero = (PassThru.getOpcode() == ISD::BITCAST ||
10256 PassThru.getOpcode() == ARMISD::VECTOR_REG_CAST) &&
10257 isZeroVector(PassThru->getOperand(0));
10258 if (!PassThru.isUndef() && !PassThruIsCastZero)
10259 Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
10260 return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
10261}
10262
10264 const ARMSubtarget *ST) {
10265 if (!ST->hasMVEIntegerOps())
10266 return SDValue();
10267
10268 SDLoc dl(Op);
10269 unsigned BaseOpcode = 0;
10270 switch (Op->getOpcode()) {
10271 default: llvm_unreachable("Expected VECREDUCE opcode");
10272 case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
10273 case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
10274 case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
10275 case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
10276 case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
10277 case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
10278 case ISD::VECREDUCE_FMAX: BaseOpcode = ISD::FMAXNUM; break;
10279 case ISD::VECREDUCE_FMIN: BaseOpcode = ISD::FMINNUM; break;
10280 }
10281
10282 SDValue Op0 = Op->getOperand(0);
10283 EVT VT = Op0.getValueType();
10284 EVT EltVT = VT.getVectorElementType();
10285 unsigned NumElts = VT.getVectorNumElements();
10286 unsigned NumActiveLanes = NumElts;
10287
10288 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10289 NumActiveLanes == 2) &&
10290 "Only expected a power 2 vector size");
10291
10292 // Use Mul(X, Rev(X)) until 4 items remain. Going down to 4 vector elements
10293 // allows us to easily extract vector elements from the lanes.
10294 while (NumActiveLanes > 4) {
10295 unsigned RevOpcode = NumActiveLanes == 16 ? ARMISD::VREV16 : ARMISD::VREV32;
10296 SDValue Rev = DAG.getNode(RevOpcode, dl, VT, Op0);
10297 Op0 = DAG.getNode(BaseOpcode, dl, VT, Op0, Rev);
10298 NumActiveLanes /= 2;
10299 }
10300
10301 SDValue Res;
10302 if (NumActiveLanes == 4) {
10303 // The remaining 4 elements are summed sequentially
10304 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10305 DAG.getConstant(0 * NumElts / 4, dl, MVT::i32));
10306 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10307 DAG.getConstant(1 * NumElts / 4, dl, MVT::i32));
10308 SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10309 DAG.getConstant(2 * NumElts / 4, dl, MVT::i32));
10310 SDValue Ext3 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10311 DAG.getConstant(3 * NumElts / 4, dl, MVT::i32));
10312 SDValue Res0 = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10313 SDValue Res1 = DAG.getNode(BaseOpcode, dl, EltVT, Ext2, Ext3, Op->getFlags());
10314 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res0, Res1, Op->getFlags());
10315 } else {
10316 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10317 DAG.getConstant(0, dl, MVT::i32));
10318 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10319 DAG.getConstant(1, dl, MVT::i32));
10320 Res = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10321 }
10322
10323 // Result type may be wider than element type.
10324 if (EltVT != Op->getValueType(0))
10325 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Op->getValueType(0), Res);
10326 return Res;
10327}
10328
10330 const ARMSubtarget *ST) {
10331 if (!ST->hasMVEFloatOps())
10332 return SDValue();
10333 return LowerVecReduce(Op, DAG, ST);
10334}
10335
10337 const ARMSubtarget *ST) {
10338 if (!ST->hasNEON())
10339 return SDValue();
10340
10341 SDLoc dl(Op);
10342 SDValue Op0 = Op->getOperand(0);
10343 EVT VT = Op0.getValueType();
10344 EVT EltVT = VT.getVectorElementType();
10345
10346 unsigned PairwiseIntrinsic = 0;
10347 switch (Op->getOpcode()) {
10348 default:
10349 llvm_unreachable("Expected VECREDUCE opcode");
10351 PairwiseIntrinsic = Intrinsic::arm_neon_vpminu;
10352 break;
10354 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxu;
10355 break;
10357 PairwiseIntrinsic = Intrinsic::arm_neon_vpmins;
10358 break;
10360 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxs;
10361 break;
10362 }
10363 SDValue PairwiseOp = DAG.getConstant(PairwiseIntrinsic, dl, MVT::i32);
10364
10365 unsigned NumElts = VT.getVectorNumElements();
10366 unsigned NumActiveLanes = NumElts;
10367
10368 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10369 NumActiveLanes == 2) &&
10370 "Only expected a power 2 vector size");
10371
10372 // Split 128-bit vectors, since vpmin/max takes 2 64-bit vectors.
10373 if (VT.is128BitVector()) {
10374 SDValue Lo, Hi;
10375 std::tie(Lo, Hi) = DAG.SplitVector(Op0, dl);
10376 VT = Lo.getValueType();
10377 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Lo, Hi});
10378 NumActiveLanes /= 2;
10379 }
10380
10381 // Use pairwise reductions until one lane remains
10382 while (NumActiveLanes > 1) {
10383 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Op0, Op0});
10384 NumActiveLanes /= 2;
10385 }
10386
10387 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10388 DAG.getConstant(0, dl, MVT::i32));
10389
10390 // Result type may be wider than element type.
10391 if (EltVT != Op.getValueType()) {
10392 unsigned Extend = 0;
10393 switch (Op->getOpcode()) {
10394 default:
10395 llvm_unreachable("Expected VECREDUCE opcode");
10398 Extend = ISD::ZERO_EXTEND;
10399 break;
10402 Extend = ISD::SIGN_EXTEND;
10403 break;
10404 }
10405 Res = DAG.getNode(Extend, dl, Op.getValueType(), Res);
10406 }
10407 return Res;
10408}
10409
10411 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getSuccessOrdering()))
10412 // Acquire/Release load/store is not legal for targets without a dmb or
10413 // equivalent available.
10414 return SDValue();
10415
10416 // Monotonic load/store is legal for all targets.
10417 return Op;
10418}
10419
10422 SelectionDAG &DAG,
10423 const ARMSubtarget *Subtarget) {
10424 SDLoc DL(N);
10425 // Under Power Management extensions, the cycle-count is:
10426 // mrc p15, #0, <Rt>, c9, c13, #0
10427 SDValue Ops[] = { N->getOperand(0), // Chain
10428 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
10429 DAG.getTargetConstant(15, DL, MVT::i32),
10430 DAG.getTargetConstant(0, DL, MVT::i32),
10431 DAG.getTargetConstant(9, DL, MVT::i32),
10432 DAG.getTargetConstant(13, DL, MVT::i32),
10433 DAG.getTargetConstant(0, DL, MVT::i32)
10434 };
10435
10436 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
10437 DAG.getVTList(MVT::i32, MVT::Other), Ops);
10438 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
10439 DAG.getConstant(0, DL, MVT::i32)));
10440 Results.push_back(Cycles32.getValue(1));
10441}
10442
10444 SDLoc dl(V.getNode());
10445 auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);
10446 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10447 if (isBigEndian)
10448 std::swap (VLo, VHi);
10449 SDValue RegClass =
10450 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
10451 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
10452 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
10453 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
10454 return SDValue(
10455 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
10456}
10457
10460 SelectionDAG &DAG) {
10461 assert(N->getValueType(0) == MVT::i64 &&
10462 "AtomicCmpSwap on types less than 64 should be legal");
10463 SDValue Ops[] = {N->getOperand(1),
10464 createGPRPairNode(DAG, N->getOperand(2)),
10465 createGPRPairNode(DAG, N->getOperand(3)),
10466 N->getOperand(0)};
10467 SDNode *CmpSwap = DAG.getMachineNode(
10468 ARM::CMP_SWAP_64, SDLoc(N),
10469 DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
10470
10471 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
10472 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
10473
10474 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10475
10476 SDValue Lo =
10477 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
10478 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10479 SDValue Hi =
10480 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
10481 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10482 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i64, Lo, Hi));
10483 Results.push_back(SDValue(CmpSwap, 2));
10484}
10485
10486SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
10487 SDLoc dl(Op);
10488 EVT VT = Op.getValueType();
10489 SDValue Chain = Op.getOperand(0);
10490 SDValue LHS = Op.getOperand(1);
10491 SDValue RHS = Op.getOperand(2);
10492 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
10493 bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
10494
10495 // If we don't have instructions of this float type then soften to a libcall
10496 // and use SETCC instead.
10497 if (isUnsupportedFloatingType(LHS.getValueType())) {
10499 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS, Chain, IsSignaling);
10500 if (!RHS.getNode()) {
10501 RHS = DAG.getConstant(0, dl, LHS.getValueType());
10502 CC = ISD::SETNE;
10503 }
10504 SDValue Result = DAG.getNode(ISD::SETCC, dl, VT, LHS, RHS,
10505 DAG.getCondCode(CC));
10506 return DAG.getMergeValues({Result, Chain}, dl);
10507 }
10508
10509 ARMCC::CondCodes CondCode, CondCode2;
10510 FPCCToARMCC(CC, CondCode, CondCode2);
10511
10512 // FIXME: Chain is not handled correctly here. Currently the FPSCR is implicit
10513 // in CMPFP and CMPFPE, but instead it should be made explicit by these
10514 // instructions using a chain instead of glue. This would also fix the problem
10515 // here (and also in LowerSELECT_CC) where we generate two comparisons when
10516 // CondCode2 != AL.
10517 SDValue True = DAG.getConstant(1, dl, VT);
10518 SDValue False = DAG.getConstant(0, dl, VT);
10519 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
10520 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
10521 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10522 SDValue Result = getCMOV(dl, VT, False, True, ARMcc, CCR, Cmp, DAG);
10523 if (CondCode2 != ARMCC::AL) {
10524 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
10525 Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10526 Result = getCMOV(dl, VT, Result, True, ARMcc, CCR, Cmp, DAG);
10527 }
10528 return DAG.getMergeValues({Result, Chain}, dl);
10529}
10530
10531SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const {
10533
10534 EVT VT = getPointerTy(DAG.getDataLayout());
10535 SDLoc DL(Op);
10536 int FI = MFI.CreateFixedObject(4, 0, false);
10537 return DAG.getFrameIndex(FI, VT);
10538}
10539
10541 LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
10542 switch (Op.getOpcode()) {
10543 default: llvm_unreachable("Don't know how to custom lower this!");
10544 case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
10545 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10546 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10547 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10548 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10549 case ISD::SELECT: return LowerSELECT(Op, DAG);
10550 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10551 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
10552 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
10553 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
10554 case ISD::VASTART: return LowerVASTART(Op, DAG);
10555 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
10556 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
10557 case ISD::SINT_TO_FP:
10558 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10561 case ISD::FP_TO_SINT:
10562 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
10564 case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG, Subtarget);
10565 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
10566 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10567 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10568 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
10569 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
10570 case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
10571 case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);
10572 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
10573 Subtarget);
10574 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
10575 case ISD::SHL:
10576 case ISD::SRL:
10577 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
10578 case ISD::SREM: return LowerREM(Op.getNode(), DAG);
10579 case ISD::UREM: return LowerREM(Op.getNode(), DAG);
10580 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
10581 case ISD::SRL_PARTS:
10582 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
10583 case ISD::CTTZ:
10584 case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
10585 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
10586 case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget);
10587 case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
10588 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
10589 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
10590 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
10591 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG, Subtarget);
10592 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10593 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);
10594 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);
10595 case ISD::TRUNCATE: return LowerTruncate(Op.getNode(), DAG, Subtarget);
10596 case ISD::SIGN_EXTEND:
10597 case ISD::ZERO_EXTEND: return LowerVectorExtend(Op.getNode(), DAG, Subtarget);
10598 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
10599 case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
10600 case ISD::SET_FPMODE:
10601 return LowerSET_FPMODE(Op, DAG);
10602 case ISD::RESET_FPMODE:
10603 return LowerRESET_FPMODE(Op, DAG);
10604 case ISD::MUL: return LowerMUL(Op, DAG);
10605 case ISD::SDIV:
10606 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10607 return LowerDIV_Windows(Op, DAG, /* Signed */ true);
10608 return LowerSDIV(Op, DAG, Subtarget);
10609 case ISD::UDIV:
10610 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10611 return LowerDIV_Windows(Op, DAG, /* Signed */ false);
10612 return LowerUDIV(Op, DAG, Subtarget);
10613 case ISD::UADDO_CARRY:
10614 case ISD::USUBO_CARRY:
10615 return LowerUADDSUBO_CARRY(Op, DAG);
10616 case ISD::SADDO:
10617 case ISD::SSUBO:
10618 return LowerSignedALUO(Op, DAG);
10619 case ISD::UADDO:
10620 case ISD::USUBO:
10621 return LowerUnsignedALUO(Op, DAG);
10622 case ISD::SADDSAT:
10623 case ISD::SSUBSAT:
10624 case ISD::UADDSAT:
10625 case ISD::USUBSAT:
10626 return LowerADDSUBSAT(Op, DAG, Subtarget);
10627 case ISD::LOAD:
10628 return LowerPredicateLoad(Op, DAG);
10629 case ISD::STORE:
10630 return LowerSTORE(Op, DAG, Subtarget);
10631 case ISD::MLOAD:
10632 return LowerMLOAD(Op, DAG);
10633 case ISD::VECREDUCE_MUL:
10634 case ISD::VECREDUCE_AND:
10635 case ISD::VECREDUCE_OR:
10636 case ISD::VECREDUCE_XOR:
10637 return LowerVecReduce(Op, DAG, Subtarget);
10642 return LowerVecReduceF(Op, DAG, Subtarget);
10647 return LowerVecReduceMinMax(Op, DAG, Subtarget);
10648 case ISD::ATOMIC_LOAD:
10649 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
10650 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
10651 case ISD::SDIVREM:
10652 case ISD::UDIVREM: return LowerDivRem(Op, DAG);
10654 if (Subtarget->isTargetWindows())
10655 return LowerDYNAMIC_STACKALLOC(Op, DAG);
10656 llvm_unreachable("Don't know how to custom lower this!");
10658 case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
10660 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10661 case ISD::STRICT_FSETCC:
10662 case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
10663 case ISD::SPONENTRY:
10664 return LowerSPONENTRY(Op, DAG);
10665 case ARMISD::WIN__DBZCHK: return SDValue();
10666 }
10667}
10668
10670 SelectionDAG &DAG) {
10671 unsigned IntNo = N->getConstantOperandVal(0);
10672 unsigned Opc = 0;
10673 if (IntNo == Intrinsic::arm_smlald)
10674 Opc = ARMISD::SMLALD;
10675 else if (IntNo == Intrinsic::arm_smlaldx)
10676 Opc = ARMISD::SMLALDX;
10677 else if (IntNo == Intrinsic::arm_smlsld)
10678 Opc = ARMISD::SMLSLD;
10679 else if (IntNo == Intrinsic::arm_smlsldx)
10680 Opc = ARMISD::SMLSLDX;
10681 else
10682 return;
10683
10684 SDLoc dl(N);
10685 SDValue Lo, Hi;
10686 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(3), dl, MVT::i32, MVT::i32);
10687
10688 SDValue LongMul = DAG.getNode(Opc, dl,
10689 DAG.getVTList(MVT::i32, MVT::i32),
10690 N->getOperand(1), N->getOperand(2),
10691 Lo, Hi);
10692 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
10693 LongMul.getValue(0), LongMul.getValue(1)));
10694}
10695
10696/// ReplaceNodeResults - Replace the results of node with an illegal result
10697/// type with new values built out of custom code.
10700 SelectionDAG &DAG) const {
10701 SDValue Res;
10702 switch (N->getOpcode()) {
10703 default:
10704 llvm_unreachable("Don't know how to custom expand this!");
10705 case ISD::READ_REGISTER:
10707 break;
10708 case ISD::BITCAST:
10709 Res = ExpandBITCAST(N, DAG, Subtarget);
10710 break;
10711 case ISD::SRL:
10712 case ISD::SRA:
10713 case ISD::SHL:
10714 Res = Expand64BitShift(N, DAG, Subtarget);
10715 break;
10716 case ISD::SREM:
10717 case ISD::UREM:
10718 Res = LowerREM(N, DAG);
10719 break;
10720 case ISD::SDIVREM:
10721 case ISD::UDIVREM:
10722 Res = LowerDivRem(SDValue(N, 0), DAG);
10723 assert(Res.getNumOperands() == 2 && "DivRem needs two values");
10724 Results.push_back(Res.getValue(0));
10725 Results.push_back(Res.getValue(1));
10726 return;
10727 case ISD::SADDSAT:
10728 case ISD::SSUBSAT:
10729 case ISD::UADDSAT:
10730 case ISD::USUBSAT:
10731 Res = LowerADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
10732 break;
10734 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
10735 return;
10736 case ISD::UDIV:
10737 case ISD::SDIV:
10738 assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
10739 return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
10740 Results);
10743 return;
10745 return ReplaceLongIntrinsic(N, Results, DAG);
10746 case ISD::LOAD:
10747 LowerLOAD(N, Results, DAG);
10748 break;
10749 case ISD::TRUNCATE:
10750 Res = LowerTruncate(N, DAG, Subtarget);
10751 break;
10752 case ISD::SIGN_EXTEND:
10753 case ISD::ZERO_EXTEND:
10754 Res = LowerVectorExtend(N, DAG, Subtarget);
10755 break;
10758 Res = LowerFP_TO_INT_SAT(SDValue(N, 0), DAG, Subtarget);
10759 break;
10760 }
10761 if (Res.getNode())
10762 Results.push_back(Res);
10763}
10764
10765//===----------------------------------------------------------------------===//
10766// ARM Scheduler Hooks
10767//===----------------------------------------------------------------------===//
10768
10769/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
10770/// registers the function context.
10771void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
10773 MachineBasicBlock *DispatchBB,
10774 int FI) const {
10775 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
10776 "ROPI/RWPI not currently supported with SjLj");
10777 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10778 DebugLoc dl = MI.getDebugLoc();
10779 MachineFunction *MF = MBB->getParent();
10783 const Function &F = MF->getFunction();
10784
10785 bool isThumb = Subtarget->isThumb();
10786 bool isThumb2 = Subtarget->isThumb2();
10787
10788 unsigned PCLabelId = AFI->createPICLabelUId();
10789 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
10791 ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
10792 unsigned CPI = MCP->getConstantPoolIndex(CPV, Align(4));
10793
10794 const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
10795 : &ARM::GPRRegClass;
10796
10797 // Grab constant pool and fixed stack memory operands.
10798 MachineMemOperand *CPMMO =
10801
10802 MachineMemOperand *FIMMOSt =
10805
10806 // Load the address of the dispatch MBB into the jump buffer.
10807 if (isThumb2) {
10808 // Incoming value: jbuf
10809 // ldr.n r5, LCPI1_1
10810 // orr r5, r5, #1
10811 // add r5, pc
10812 // str r5, [$jbuf, #+4] ; &jbuf[1]
10813 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10814 BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
10816 .addMemOperand(CPMMO)
10818 // Set the low bit because of thumb mode.
10819 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10820 BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
10821 .addReg(NewVReg1, RegState::Kill)
10822 .addImm(0x01)
10824 .add(condCodeOp());
10825 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10826 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
10827 .addReg(NewVReg2, RegState::Kill)
10828 .addImm(PCLabelId);
10829 BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
10830 .addReg(NewVReg3, RegState::Kill)
10831 .addFrameIndex(FI)
10832 .addImm(36) // &jbuf[1] :: pc
10833 .addMemOperand(FIMMOSt)
10835 } else if (isThumb) {
10836 // Incoming value: jbuf
10837 // ldr.n r1, LCPI1_4
10838 // add r1, pc
10839 // mov r2, #1
10840 // orrs r1, r2
10841 // add r2, $jbuf, #+4 ; &jbuf[1]
10842 // str r1, [r2]
10843 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10844 BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
10846 .addMemOperand(CPMMO)
10848 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10849 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
10850 .addReg(NewVReg1, RegState::Kill)
10851 .addImm(PCLabelId);
10852 // Set the low bit because of thumb mode.
10853 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10854 BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
10855 .addReg(ARM::CPSR, RegState::Define)
10856 .addImm(1)
10858 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10859 BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
10860 .addReg(ARM::CPSR, RegState::Define)
10861 .addReg(NewVReg2, RegState::Kill)
10862 .addReg(NewVReg3, RegState::Kill)
10864 Register NewVReg5 = MRI->createVirtualRegister(TRC);
10865 BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
10866 .addFrameIndex(FI)
10867 .addImm(36); // &jbuf[1] :: pc
10868 BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
10869 .addReg(NewVReg4, RegState::Kill)
10870 .addReg(NewVReg5, RegState::Kill)
10871 .addImm(0)
10872 .addMemOperand(FIMMOSt)
10874 } else {
10875 // Incoming value: jbuf
10876 // ldr r1, LCPI1_1
10877 // add r1, pc, r1
10878 // str r1, [$jbuf, #+4] ; &jbuf[1]
10879 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10880 BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
10882 .addImm(0)
10883 .addMemOperand(CPMMO)
10885 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10886 BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
10887 .addReg(NewVReg1, RegState::Kill)
10888 .addImm(PCLabelId)
10890 BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
10891 .addReg(NewVReg2, RegState::Kill)
10892 .addFrameIndex(FI)
10893 .addImm(36) // &jbuf[1] :: pc
10894 .addMemOperand(FIMMOSt)
10896 }
10897}
10898
10899void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
10900 MachineBasicBlock *MBB) const {
10901 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10902 DebugLoc dl = MI.getDebugLoc();
10903 MachineFunction *MF = MBB->getParent();
10905 MachineFrameInfo &MFI = MF->getFrameInfo();
10906 int FI = MFI.getFunctionContextIndex();
10907
10908 const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
10909 : &ARM::GPRnopcRegClass;
10910
10911 // Get a mapping of the call site numbers to all of the landing pads they're
10912 // associated with.
10914 unsigned MaxCSNum = 0;
10915 for (MachineBasicBlock &BB : *MF) {
10916 if (!BB.isEHPad())
10917 continue;
10918
10919 // FIXME: We should assert that the EH_LABEL is the first MI in the landing
10920 // pad.
10921 for (MachineInstr &II : BB) {
10922 if (!II.isEHLabel())
10923 continue;
10924
10925 MCSymbol *Sym = II.getOperand(0).getMCSymbol();
10926 if (!MF->hasCallSiteLandingPad(Sym)) continue;
10927
10928 SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
10929 for (unsigned Idx : CallSiteIdxs) {
10930 CallSiteNumToLPad[Idx].push_back(&BB);
10931 MaxCSNum = std::max(MaxCSNum, Idx);
10932 }
10933 break;
10934 }
10935 }
10936
10937 // Get an ordered list of the machine basic blocks for the jump table.
10938 std::vector<MachineBasicBlock*> LPadList;
10940 LPadList.reserve(CallSiteNumToLPad.size());
10941 for (unsigned I = 1; I <= MaxCSNum; ++I) {
10942 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
10943 for (MachineBasicBlock *MBB : MBBList) {
10944 LPadList.push_back(MBB);
10945 InvokeBBs.insert(MBB->pred_begin(), MBB->pred_end());
10946 }
10947 }
10948
10949 assert(!LPadList.empty() &&
10950 "No landing pad destinations for the dispatch jump table!");
10951
10952 // Create the jump table and associated information.
10954 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
10955 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
10956
10957 // Create the MBBs for the dispatch code.
10958
10959 // Shove the dispatch's address into the return slot in the function context.
10960 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
10961 DispatchBB->setIsEHPad();
10962
10963 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
10964 unsigned trap_opcode;
10965 if (Subtarget->isThumb())
10966 trap_opcode = ARM::tTRAP;
10967 else
10968 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
10969
10970 BuildMI(TrapBB, dl, TII->get(trap_opcode));
10971 DispatchBB->addSuccessor(TrapBB);
10972
10973 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
10974 DispatchBB->addSuccessor(DispContBB);
10975
10976 // Insert and MBBs.
10977 MF->insert(MF->end(), DispatchBB);
10978 MF->insert(MF->end(), DispContBB);
10979 MF->insert(MF->end(), TrapBB);
10980
10981 // Insert code into the entry block that creates and registers the function
10982 // context.
10983 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
10984
10985 MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
10988
10990 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
10991
10992 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
10993 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
10994
10995 // Add a register mask with no preserved registers. This results in all
10996 // registers being marked as clobbered. This can't work if the dispatch block
10997 // is in a Thumb1 function and is linked with ARM code which uses the FP
10998 // registers, as there is no way to preserve the FP registers in Thumb1 mode.
11000
11001 bool IsPositionIndependent = isPositionIndependent();
11002 unsigned NumLPads = LPadList.size();
11003 if (Subtarget->isThumb2()) {
11004 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11005 BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
11006 .addFrameIndex(FI)
11007 .addImm(4)
11008 .addMemOperand(FIMMOLd)
11010
11011 if (NumLPads < 256) {
11012 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
11013 .addReg(NewVReg1)
11014 .addImm(LPadList.size())
11016 } else {
11017 Register VReg1 = MRI->createVirtualRegister(TRC);
11018 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
11019 .addImm(NumLPads & 0xFFFF)
11021
11022 unsigned VReg2 = VReg1;
11023 if ((NumLPads & 0xFFFF0000) != 0) {
11024 VReg2 = MRI->createVirtualRegister(TRC);
11025 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
11026 .addReg(VReg1)
11027 .addImm(NumLPads >> 16)
11029 }
11030
11031 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
11032 .addReg(NewVReg1)
11033 .addReg(VReg2)
11035 }
11036
11037 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
11038 .addMBB(TrapBB)
11040 .addReg(ARM::CPSR);
11041
11042 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11043 BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
11044 .addJumpTableIndex(MJTI)
11046
11047 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11048 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
11049 .addReg(NewVReg3, RegState::Kill)
11050 .addReg(NewVReg1)
11053 .add(condCodeOp());
11054
11055 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
11056 .addReg(NewVReg4, RegState::Kill)
11057 .addReg(NewVReg1)
11058 .addJumpTableIndex(MJTI);
11059 } else if (Subtarget->isThumb()) {
11060 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11061 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
11062 .addFrameIndex(FI)
11063 .addImm(1)
11064 .addMemOperand(FIMMOLd)
11066
11067 if (NumLPads < 256) {
11068 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
11069 .addReg(NewVReg1)
11070 .addImm(NumLPads)
11072 } else {
11073 MachineConstantPool *ConstantPool = MF->getConstantPool();
11074 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11075 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11076
11077 // MachineConstantPool wants an explicit alignment.
11078 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11079 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11080
11081 Register VReg1 = MRI->createVirtualRegister(TRC);
11082 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
11083 .addReg(VReg1, RegState::Define)
11086 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
11087 .addReg(NewVReg1)
11088 .addReg(VReg1)
11090 }
11091
11092 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
11093 .addMBB(TrapBB)
11095 .addReg(ARM::CPSR);
11096
11097 Register NewVReg2 = MRI->createVirtualRegister(TRC);
11098 BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
11099 .addReg(ARM::CPSR, RegState::Define)
11100 .addReg(NewVReg1)
11101 .addImm(2)
11103
11104 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11105 BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
11106 .addJumpTableIndex(MJTI)
11108
11109 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11110 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
11111 .addReg(ARM::CPSR, RegState::Define)
11112 .addReg(NewVReg2, RegState::Kill)
11113 .addReg(NewVReg3)
11115
11116 MachineMemOperand *JTMMOLd =
11117 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11119
11120 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11121 BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
11122 .addReg(NewVReg4, RegState::Kill)
11123 .addImm(0)
11124 .addMemOperand(JTMMOLd)
11126
11127 unsigned NewVReg6 = NewVReg5;
11128 if (IsPositionIndependent) {
11129 NewVReg6 = MRI->createVirtualRegister(TRC);
11130 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
11131 .addReg(ARM::CPSR, RegState::Define)
11132 .addReg(NewVReg5, RegState::Kill)
11133 .addReg(NewVReg3)
11135 }
11136
11137 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
11138 .addReg(NewVReg6, RegState::Kill)
11139 .addJumpTableIndex(MJTI);
11140 } else {
11141 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11142 BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
11143 .addFrameIndex(FI)
11144 .addImm(4)
11145 .addMemOperand(FIMMOLd)
11147
11148 if (NumLPads < 256) {
11149 BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
11150 .addReg(NewVReg1)
11151 .addImm(NumLPads)
11153 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
11154 Register VReg1 = MRI->createVirtualRegister(TRC);
11155 BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
11156 .addImm(NumLPads & 0xFFFF)
11158
11159 unsigned VReg2 = VReg1;
11160 if ((NumLPads & 0xFFFF0000) != 0) {
11161 VReg2 = MRI->createVirtualRegister(TRC);
11162 BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
11163 .addReg(VReg1)
11164 .addImm(NumLPads >> 16)
11166 }
11167
11168 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11169 .addReg(NewVReg1)
11170 .addReg(VReg2)
11172 } else {
11173 MachineConstantPool *ConstantPool = MF->getConstantPool();
11174 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11175 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11176
11177 // MachineConstantPool wants an explicit alignment.
11178 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11179 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11180
11181 Register VReg1 = MRI->createVirtualRegister(TRC);
11182 BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
11183 .addReg(VReg1, RegState::Define)
11185 .addImm(0)
11187 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11188 .addReg(NewVReg1)
11189 .addReg(VReg1, RegState::Kill)
11191 }
11192
11193 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
11194 .addMBB(TrapBB)
11196 .addReg(ARM::CPSR);
11197
11198 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11199 BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
11200 .addReg(NewVReg1)
11203 .add(condCodeOp());
11204 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11205 BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
11206 .addJumpTableIndex(MJTI)
11208
11209 MachineMemOperand *JTMMOLd =
11210 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11212 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11213 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
11214 .addReg(NewVReg3, RegState::Kill)
11215 .addReg(NewVReg4)
11216 .addImm(0)
11217 .addMemOperand(JTMMOLd)
11219
11220 if (IsPositionIndependent) {
11221 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
11222 .addReg(NewVReg5, RegState::Kill)
11223 .addReg(NewVReg4)
11224 .addJumpTableIndex(MJTI);
11225 } else {
11226 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
11227 .addReg(NewVReg5, RegState::Kill)
11228 .addJumpTableIndex(MJTI);
11229 }
11230 }
11231
11232 // Add the jump table entries as successors to the MBB.
11234 for (MachineBasicBlock *CurMBB : LPadList) {
11235 if (SeenMBBs.insert(CurMBB).second)
11236 DispContBB->addSuccessor(CurMBB);
11237 }
11238
11239 // N.B. the order the invoke BBs are processed in doesn't matter here.
11240 const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
11242 for (MachineBasicBlock *BB : InvokeBBs) {
11243
11244 // Remove the landing pad successor from the invoke block and replace it
11245 // with the new dispatch block.
11246 SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
11247 while (!Successors.empty()) {
11248 MachineBasicBlock *SMBB = Successors.pop_back_val();
11249 if (SMBB->isEHPad()) {
11250 BB->removeSuccessor(SMBB);
11251 MBBLPads.push_back(SMBB);
11252 }
11253 }
11254
11255 BB->addSuccessor(DispatchBB, BranchProbability::getZero());
11256 BB->normalizeSuccProbs();
11257
11258 // Find the invoke call and mark all of the callee-saved registers as
11259 // 'implicit defined' so that they're spilled. This prevents code from
11260 // moving instructions to before the EH block, where they will never be
11261 // executed.
11263 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
11264 if (!II->isCall()) continue;
11265
11268 OI = II->operands_begin(), OE = II->operands_end();
11269 OI != OE; ++OI) {
11270 if (!OI->isReg()) continue;
11271 DefRegs[OI->getReg()] = true;
11272 }
11273
11274 MachineInstrBuilder MIB(*MF, &*II);
11275
11276 for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
11277 unsigned Reg = SavedRegs[i];
11278 if (Subtarget->isThumb2() &&
11279 !ARM::tGPRRegClass.contains(Reg) &&
11280 !ARM::hGPRRegClass.contains(Reg))
11281 continue;
11282 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
11283 continue;
11284 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
11285 continue;
11286 if (!DefRegs[Reg])
11288 }
11289
11290 break;
11291 }
11292 }
11293
11294 // Mark all former landing pads as non-landing pads. The dispatch is the only
11295 // landing pad now.
11296 for (MachineBasicBlock *MBBLPad : MBBLPads)
11297 MBBLPad->setIsEHPad(false);
11298
11299 // The instruction is gone now.
11300 MI.eraseFromParent();
11301}
11302
11303static
11305 for (MachineBasicBlock *S : MBB->successors())
11306 if (S != Succ)
11307 return S;
11308 llvm_unreachable("Expecting a BB with two successors!");
11309}
11310
11311/// Return the load opcode for a given load size. If load size >= 8,
11312/// neon opcode will be returned.
11313static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
11314 if (LdSize >= 8)
11315 return LdSize == 16 ? ARM::VLD1q32wb_fixed
11316 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
11317 if (IsThumb1)
11318 return LdSize == 4 ? ARM::tLDRi
11319 : LdSize == 2 ? ARM::tLDRHi
11320 : LdSize == 1 ? ARM::tLDRBi : 0;
11321 if (IsThumb2)
11322 return LdSize == 4 ? ARM::t2LDR_POST
11323 : LdSize == 2 ? ARM::t2LDRH_POST
11324 : LdSize == 1 ? ARM::t2LDRB_POST : 0;
11325 return LdSize == 4 ? ARM::LDR_POST_IMM
11326 : LdSize == 2 ? ARM::LDRH_POST
11327 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
11328}
11329
11330/// Return the store opcode for a given store size. If store size >= 8,
11331/// neon opcode will be returned.
11332static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
11333 if (StSize >= 8)
11334 return StSize == 16 ? ARM::VST1q32wb_fixed
11335 : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
11336 if (IsThumb1)
11337 return StSize == 4 ? ARM::tSTRi
11338 : StSize == 2 ? ARM::tSTRHi
11339 : StSize == 1 ? ARM::tSTRBi : 0;
11340 if (IsThumb2)
11341 return StSize == 4 ? ARM::t2STR_POST
11342 : StSize == 2 ? ARM::t2STRH_POST
11343 : StSize == 1 ? ARM::t2STRB_POST : 0;
11344 return StSize == 4 ? ARM::STR_POST_IMM
11345 : StSize == 2 ? ARM::STRH_POST
11346 : StSize == 1 ? ARM::STRB_POST_IMM : 0;
11347}
11348
11349/// Emit a post-increment load operation with given size. The instructions
11350/// will be added to BB at Pos.
11352 const TargetInstrInfo *TII, const DebugLoc &dl,
11353 unsigned LdSize, unsigned Data, unsigned AddrIn,
11354 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11355 unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
11356 assert(LdOpc != 0 && "Should have a load opcode");
11357 if (LdSize >= 8) {
11358 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11359 .addReg(AddrOut, RegState::Define)
11360 .addReg(AddrIn)
11361 .addImm(0)
11363 } else if (IsThumb1) {
11364 // load + update AddrIn
11365 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11366 .addReg(AddrIn)
11367 .addImm(0)
11369 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11370 .add(t1CondCodeOp())
11371 .addReg(AddrIn)
11372 .addImm(LdSize)
11374 } else if (IsThumb2) {
11375 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11376 .addReg(AddrOut, RegState::Define)
11377 .addReg(AddrIn)
11378 .addImm(LdSize)
11380 } else { // arm
11381 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11382 .addReg(AddrOut, RegState::Define)
11383 .addReg(AddrIn)
11384 .addReg(0)
11385 .addImm(LdSize)
11387 }
11388}
11389
11390/// Emit a post-increment store operation with given size. The instructions
11391/// will be added to BB at Pos.
11393 const TargetInstrInfo *TII, const DebugLoc &dl,
11394 unsigned StSize, unsigned Data, unsigned AddrIn,
11395 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11396 unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
11397 assert(StOpc != 0 && "Should have a store opcode");
11398 if (StSize >= 8) {
11399 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11400 .addReg(AddrIn)
11401 .addImm(0)
11402 .addReg(Data)
11404 } else if (IsThumb1) {
11405 // store + update AddrIn
11406 BuildMI(*BB, Pos, dl, TII->get(StOpc))
11407 .addReg(Data)
11408 .addReg(AddrIn)
11409 .addImm(0)
11411 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11412 .add(t1CondCodeOp())
11413 .addReg(AddrIn)
11414 .addImm(StSize)
11416 } else if (IsThumb2) {
11417 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11418 .addReg(Data)
11419 .addReg(AddrIn)
11420 .addImm(StSize)
11422 } else { // arm
11423 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11424 .addReg(Data)
11425 .addReg(AddrIn)
11426 .addReg(0)
11427 .addImm(StSize)
11429 }
11430}
11431
11433ARMTargetLowering::EmitStructByval(MachineInstr &MI,
11434 MachineBasicBlock *BB) const {
11435 // This pseudo instruction has 3 operands: dst, src, size
11436 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
11437 // Otherwise, we will generate unrolled scalar copies.
11438 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11439 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11441
11442 Register dest = MI.getOperand(0).getReg();
11443 Register src = MI.getOperand(1).getReg();
11444 unsigned SizeVal = MI.getOperand(2).getImm();
11445 unsigned Alignment = MI.getOperand(3).getImm();
11446 DebugLoc dl = MI.getDebugLoc();
11447
11448 MachineFunction *MF = BB->getParent();
11450 unsigned UnitSize = 0;
11451 const TargetRegisterClass *TRC = nullptr;
11452 const TargetRegisterClass *VecTRC = nullptr;
11453
11454 bool IsThumb1 = Subtarget->isThumb1Only();
11455 bool IsThumb2 = Subtarget->isThumb2();
11456 bool IsThumb = Subtarget->isThumb();
11457
11458 if (Alignment & 1) {
11459 UnitSize = 1;
11460 } else if (Alignment & 2) {
11461 UnitSize = 2;
11462 } else {
11463 // Check whether we can use NEON instructions.
11464 if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
11465 Subtarget->hasNEON()) {
11466 if ((Alignment % 16 == 0) && SizeVal >= 16)
11467 UnitSize = 16;
11468 else if ((Alignment % 8 == 0) && SizeVal >= 8)
11469 UnitSize = 8;
11470 }
11471 // Can't use NEON instructions.
11472 if (UnitSize == 0)
11473 UnitSize = 4;
11474 }
11475
11476 // Select the correct opcode and register class for unit size load/store
11477 bool IsNeon = UnitSize >= 8;
11478 TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
11479 if (IsNeon)
11480 VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
11481 : UnitSize == 8 ? &ARM::DPRRegClass
11482 : nullptr;
11483
11484 unsigned BytesLeft = SizeVal % UnitSize;
11485 unsigned LoopSize = SizeVal - BytesLeft;
11486
11487 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
11488 // Use LDR and STR to copy.
11489 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
11490 // [destOut] = STR_POST(scratch, destIn, UnitSize)
11491 unsigned srcIn = src;
11492 unsigned destIn = dest;
11493 for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
11494 Register srcOut = MRI.createVirtualRegister(TRC);
11495 Register destOut = MRI.createVirtualRegister(TRC);
11496 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11497 emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
11498 IsThumb1, IsThumb2);
11499 emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
11500 IsThumb1, IsThumb2);
11501 srcIn = srcOut;
11502 destIn = destOut;
11503 }
11504
11505 // Handle the leftover bytes with LDRB and STRB.
11506 // [scratch, srcOut] = LDRB_POST(srcIn, 1)
11507 // [destOut] = STRB_POST(scratch, destIn, 1)
11508 for (unsigned i = 0; i < BytesLeft; i++) {
11509 Register srcOut = MRI.createVirtualRegister(TRC);
11510 Register destOut = MRI.createVirtualRegister(TRC);
11511 Register scratch = MRI.createVirtualRegister(TRC);
11512 emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
11513 IsThumb1, IsThumb2);
11514 emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
11515 IsThumb1, IsThumb2);
11516 srcIn = srcOut;
11517 destIn = destOut;
11518 }
11519 MI.eraseFromParent(); // The instruction is gone now.
11520 return BB;
11521 }
11522
11523 // Expand the pseudo op to a loop.
11524 // thisMBB:
11525 // ...
11526 // movw varEnd, # --> with thumb2
11527 // movt varEnd, #
11528 // ldrcp varEnd, idx --> without thumb2
11529 // fallthrough --> loopMBB
11530 // loopMBB:
11531 // PHI varPhi, varEnd, varLoop
11532 // PHI srcPhi, src, srcLoop
11533 // PHI destPhi, dst, destLoop
11534 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11535 // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
11536 // subs varLoop, varPhi, #UnitSize
11537 // bne loopMBB
11538 // fallthrough --> exitMBB
11539 // exitMBB:
11540 // epilogue to handle left-over bytes
11541 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11542 // [destOut] = STRB_POST(scratch, destLoop, 1)
11543 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11544 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11545 MF->insert(It, loopMBB);
11546 MF->insert(It, exitMBB);
11547
11548 // Set the call frame size on entry to the new basic blocks.
11549 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
11550 loopMBB->setCallFrameSize(CallFrameSize);
11551 exitMBB->setCallFrameSize(CallFrameSize);
11552
11553 // Transfer the remainder of BB and its successor edges to exitMBB.
11554 exitMBB->splice(exitMBB->begin(), BB,
11555 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11557
11558 // Load an immediate to varEnd.
11559 Register varEnd = MRI.createVirtualRegister(TRC);
11560 if (Subtarget->useMovt()) {
11561 BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi32imm : ARM::MOVi32imm),
11562 varEnd)
11563 .addImm(LoopSize);
11564 } else if (Subtarget->genExecuteOnly()) {
11565 assert(IsThumb && "Non-thumb expected to have used movt");
11566 BuildMI(BB, dl, TII->get(ARM::tMOVi32imm), varEnd).addImm(LoopSize);
11567 } else {
11570 const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
11571
11572 // MachineConstantPool wants an explicit alignment.
11573 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11574 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11575 MachineMemOperand *CPMMO =
11578
11579 if (IsThumb)
11580 BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
11581 .addReg(varEnd, RegState::Define)
11584 .addMemOperand(CPMMO);
11585 else
11586 BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
11587 .addReg(varEnd, RegState::Define)
11589 .addImm(0)
11591 .addMemOperand(CPMMO);
11592 }
11593 BB->addSuccessor(loopMBB);
11594
11595 // Generate the loop body:
11596 // varPhi = PHI(varLoop, varEnd)
11597 // srcPhi = PHI(srcLoop, src)
11598 // destPhi = PHI(destLoop, dst)
11599 MachineBasicBlock *entryBB = BB;
11600 BB = loopMBB;
11601 Register varLoop = MRI.createVirtualRegister(TRC);
11602 Register varPhi = MRI.createVirtualRegister(TRC);
11603 Register srcLoop = MRI.createVirtualRegister(TRC);
11604 Register srcPhi = MRI.createVirtualRegister(TRC);
11605 Register destLoop = MRI.createVirtualRegister(TRC);
11606 Register destPhi = MRI.createVirtualRegister(TRC);
11607
11608 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
11609 .addReg(varLoop).addMBB(loopMBB)
11610 .addReg(varEnd).addMBB(entryBB);
11611 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
11612 .addReg(srcLoop).addMBB(loopMBB)
11613 .addReg(src).addMBB(entryBB);
11614 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
11615 .addReg(destLoop).addMBB(loopMBB)
11616 .addReg(dest).addMBB(entryBB);
11617
11618 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11619 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
11620 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11621 emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
11622 IsThumb1, IsThumb2);
11623 emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
11624 IsThumb1, IsThumb2);
11625
11626 // Decrement loop variable by UnitSize.
11627 if (IsThumb1) {
11628 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
11629 .add(t1CondCodeOp())
11630 .addReg(varPhi)
11631 .addImm(UnitSize)
11633 } else {
11635 BuildMI(*BB, BB->end(), dl,
11636 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
11637 MIB.addReg(varPhi)
11638 .addImm(UnitSize)
11640 .add(condCodeOp());
11641 MIB->getOperand(5).setReg(ARM::CPSR);
11642 MIB->getOperand(5).setIsDef(true);
11643 }
11644 BuildMI(*BB, BB->end(), dl,
11645 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
11646 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
11647
11648 // loopMBB can loop back to loopMBB or fall through to exitMBB.
11649 BB->addSuccessor(loopMBB);
11650 BB->addSuccessor(exitMBB);
11651
11652 // Add epilogue to handle BytesLeft.
11653 BB = exitMBB;
11654 auto StartOfExit = exitMBB->begin();
11655
11656 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11657 // [destOut] = STRB_POST(scratch, destLoop, 1)
11658 unsigned srcIn = srcLoop;
11659 unsigned destIn = destLoop;
11660 for (unsigned i = 0; i < BytesLeft; i++) {
11661 Register srcOut = MRI.createVirtualRegister(TRC);
11662 Register destOut = MRI.createVirtualRegister(TRC);
11663 Register scratch = MRI.createVirtualRegister(TRC);
11664 emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
11665 IsThumb1, IsThumb2);
11666 emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
11667 IsThumb1, IsThumb2);
11668 srcIn = srcOut;
11669 destIn = destOut;
11670 }
11671
11672 MI.eraseFromParent(); // The instruction is gone now.
11673 return BB;
11674}
11675
11677ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
11678 MachineBasicBlock *MBB) const {
11680 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
11681 DebugLoc DL = MI.getDebugLoc();
11682
11683 assert(Subtarget->isTargetWindows() &&
11684 "__chkstk is only supported on Windows");
11685 assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
11686
11687 // __chkstk takes the number of words to allocate on the stack in R4, and
11688 // returns the stack adjustment in number of bytes in R4. This will not
11689 // clober any other registers (other than the obvious lr).
11690 //
11691 // Although, technically, IP should be considered a register which may be
11692 // clobbered, the call itself will not touch it. Windows on ARM is a pure
11693 // thumb-2 environment, so there is no interworking required. As a result, we
11694 // do not expect a veneer to be emitted by the linker, clobbering IP.
11695 //
11696 // Each module receives its own copy of __chkstk, so no import thunk is
11697 // required, again, ensuring that IP is not clobbered.
11698 //
11699 // Finally, although some linkers may theoretically provide a trampoline for
11700 // out of range calls (which is quite common due to a 32M range limitation of
11701 // branches for Thumb), we can generate the long-call version via
11702 // -mcmodel=large, alleviating the need for the trampoline which may clobber
11703 // IP.
11704
11705 switch (TM.getCodeModel()) {
11706 case CodeModel::Tiny:
11707 llvm_unreachable("Tiny code model not available on ARM.");
11708 case CodeModel::Small:
11709 case CodeModel::Medium:
11710 case CodeModel::Kernel:
11711 BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
11713 .addExternalSymbol("__chkstk")
11716 .addReg(ARM::R12,
11718 .addReg(ARM::CPSR,
11720 break;
11721 case CodeModel::Large: {
11723 Register Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11724
11725 BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
11726 .addExternalSymbol("__chkstk");
11729 .addReg(Reg, RegState::Kill)
11732 .addReg(ARM::R12,
11734 .addReg(ARM::CPSR,
11736 break;
11737 }
11738 }
11739
11740 BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
11741 .addReg(ARM::SP, RegState::Kill)
11742 .addReg(ARM::R4, RegState::Kill)
11745 .add(condCodeOp());
11746
11747 MI.eraseFromParent();
11748 return MBB;
11749}
11750
11752ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
11753 MachineBasicBlock *MBB) const {
11754 DebugLoc DL = MI.getDebugLoc();
11755 MachineFunction *MF = MBB->getParent();
11756 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11757
11759 MF->insert(++MBB->getIterator(), ContBB);
11760 ContBB->splice(ContBB->begin(), MBB,
11761 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11763 MBB->addSuccessor(ContBB);
11764
11766 BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
11767 MF->push_back(TrapBB);
11768 MBB->addSuccessor(TrapBB);
11769
11770 BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
11771 .addReg(MI.getOperand(0).getReg())
11772 .addImm(0)
11774 BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
11775 .addMBB(TrapBB)
11777 .addReg(ARM::CPSR);
11778
11779 MI.eraseFromParent();
11780 return ContBB;
11781}
11782
11783// The CPSR operand of SelectItr might be missing a kill marker
11784// because there were multiple uses of CPSR, and ISel didn't know
11785// which to mark. Figure out whether SelectItr should have had a
11786// kill marker, and set it if it should. Returns the correct kill
11787// marker value.
11790 const TargetRegisterInfo* TRI) {
11791 // Scan forward through BB for a use/def of CPSR.
11792 MachineBasicBlock::iterator miI(std::next(SelectItr));
11793 for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
11794 const MachineInstr& mi = *miI;
11795 if (mi.readsRegister(ARM::CPSR, /*TRI=*/nullptr))
11796 return false;
11797 if (mi.definesRegister(ARM::CPSR, /*TRI=*/nullptr))
11798 break; // Should have kill-flag - update below.
11799 }
11800
11801 // If we hit the end of the block, check whether CPSR is live into a
11802 // successor.
11803 if (miI == BB->end()) {
11804 for (MachineBasicBlock *Succ : BB->successors())
11805 if (Succ->isLiveIn(ARM::CPSR))
11806 return false;
11807 }
11808
11809 // We found a def, or hit the end of the basic block and CPSR wasn't live
11810 // out. SelectMI should have a kill flag on CPSR.
11811 SelectItr->addRegisterKilled(ARM::CPSR, TRI);
11812 return true;
11813}
11814
11815/// Adds logic in loop entry MBB to calculate loop iteration count and adds
11816/// t2WhileLoopSetup and t2WhileLoopStart to generate WLS loop
11818 MachineBasicBlock *TpLoopBody,
11819 MachineBasicBlock *TpExit, Register OpSizeReg,
11820 const TargetInstrInfo *TII, DebugLoc Dl,
11822 // Calculates loop iteration count = ceil(n/16) = (n + 15) >> 4.
11823 Register AddDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11824 BuildMI(TpEntry, Dl, TII->get(ARM::t2ADDri), AddDestReg)
11825 .addUse(OpSizeReg)
11826 .addImm(15)
11828 .addReg(0);
11829
11830 Register LsrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11831 BuildMI(TpEntry, Dl, TII->get(ARM::t2LSRri), LsrDestReg)
11832 .addUse(AddDestReg, RegState::Kill)
11833 .addImm(4)
11835 .addReg(0);
11836
11837 Register TotalIterationsReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11838 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopSetup), TotalIterationsReg)
11839 .addUse(LsrDestReg, RegState::Kill);
11840
11841 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopStart))
11842 .addUse(TotalIterationsReg)
11843 .addMBB(TpExit);
11844
11845 BuildMI(TpEntry, Dl, TII->get(ARM::t2B))
11846 .addMBB(TpLoopBody)
11848
11849 return TotalIterationsReg;
11850}
11851
11852/// Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and
11853/// t2DoLoopEnd. These are used by later passes to generate tail predicated
11854/// loops.
11855static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
11856 MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit,
11857 const TargetInstrInfo *TII, DebugLoc Dl,
11858 MachineRegisterInfo &MRI, Register OpSrcReg,
11859 Register OpDestReg, Register ElementCountReg,
11860 Register TotalIterationsReg, bool IsMemcpy) {
11861 // First insert 4 PHI nodes for: Current pointer to Src (if memcpy), Dest
11862 // array, loop iteration counter, predication counter.
11863
11864 Register SrcPhiReg, CurrSrcReg;
11865 if (IsMemcpy) {
11866 // Current position in the src array
11867 SrcPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11868 CurrSrcReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11869 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), SrcPhiReg)
11870 .addUse(OpSrcReg)
11871 .addMBB(TpEntry)
11872 .addUse(CurrSrcReg)
11873 .addMBB(TpLoopBody);
11874 }
11875
11876 // Current position in the dest array
11877 Register DestPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11878 Register CurrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11879 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), DestPhiReg)
11880 .addUse(OpDestReg)
11881 .addMBB(TpEntry)
11882 .addUse(CurrDestReg)
11883 .addMBB(TpLoopBody);
11884
11885 // Current loop counter
11886 Register LoopCounterPhiReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11887 Register RemainingLoopIterationsReg =
11888 MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11889 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), LoopCounterPhiReg)
11890 .addUse(TotalIterationsReg)
11891 .addMBB(TpEntry)
11892 .addUse(RemainingLoopIterationsReg)
11893 .addMBB(TpLoopBody);
11894
11895 // Predication counter
11896 Register PredCounterPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11897 Register RemainingElementsReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11898 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), PredCounterPhiReg)
11899 .addUse(ElementCountReg)
11900 .addMBB(TpEntry)
11901 .addUse(RemainingElementsReg)
11902 .addMBB(TpLoopBody);
11903
11904 // Pass predication counter to VCTP
11905 Register VccrReg = MRI.createVirtualRegister(&ARM::VCCRRegClass);
11906 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)
11907 .addUse(PredCounterPhiReg)
11909 .addReg(0)
11910 .addReg(0);
11911
11912 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)
11913 .addUse(PredCounterPhiReg)
11914 .addImm(16)
11916 .addReg(0);
11917
11918 // VLDRB (only if memcpy) and VSTRB instructions, predicated using VPR
11919 Register SrcValueReg;
11920 if (IsMemcpy) {
11921 SrcValueReg = MRI.createVirtualRegister(&ARM::MQPRRegClass);
11922 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VLDRBU8_post))
11923 .addDef(CurrSrcReg)
11924 .addDef(SrcValueReg)
11925 .addReg(SrcPhiReg)
11926 .addImm(16)
11928 .addUse(VccrReg)
11929 .addReg(0);
11930 } else
11931 SrcValueReg = OpSrcReg;
11932
11933 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VSTRBU8_post))
11934 .addDef(CurrDestReg)
11935 .addUse(SrcValueReg)
11936 .addReg(DestPhiReg)
11937 .addImm(16)
11939 .addUse(VccrReg)
11940 .addReg(0);
11941
11942 // Add the pseudoInstrs for decrementing the loop counter and marking the
11943 // end:t2DoLoopDec and t2DoLoopEnd
11944 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopDec), RemainingLoopIterationsReg)
11945 .addUse(LoopCounterPhiReg)
11946 .addImm(1);
11947
11948 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopEnd))
11949 .addUse(RemainingLoopIterationsReg)
11950 .addMBB(TpLoopBody);
11951
11952 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2B))
11953 .addMBB(TpExit)
11955}
11956
11959 MachineBasicBlock *BB) const {
11960 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11961 DebugLoc dl = MI.getDebugLoc();
11962 bool isThumb2 = Subtarget->isThumb2();
11963 switch (MI.getOpcode()) {
11964 default: {
11965 MI.print(errs());
11966 llvm_unreachable("Unexpected instr type to insert");
11967 }
11968
11969 // Thumb1 post-indexed loads are really just single-register LDMs.
11970 case ARM::tLDR_postidx: {
11971 MachineOperand Def(MI.getOperand(1));
11972 BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
11973 .add(Def) // Rn_wb
11974 .add(MI.getOperand(2)) // Rn
11975 .add(MI.getOperand(3)) // PredImm
11976 .add(MI.getOperand(4)) // PredReg
11977 .add(MI.getOperand(0)) // Rt
11978 .cloneMemRefs(MI);
11979 MI.eraseFromParent();
11980 return BB;
11981 }
11982
11983 case ARM::MVE_MEMCPYLOOPINST:
11984 case ARM::MVE_MEMSETLOOPINST: {
11985
11986 // Transformation below expands MVE_MEMCPYLOOPINST/MVE_MEMSETLOOPINST Pseudo
11987 // into a Tail Predicated (TP) Loop. It adds the instructions to calculate
11988 // the iteration count =ceil(size_in_bytes/16)) in the TP entry block and
11989 // adds the relevant instructions in the TP loop Body for generation of a
11990 // WLSTP loop.
11991
11992 // Below is relevant portion of the CFG after the transformation.
11993 // The Machine Basic Blocks are shown along with branch conditions (in
11994 // brackets). Note that TP entry/exit MBBs depict the entry/exit of this
11995 // portion of the CFG and may not necessarily be the entry/exit of the
11996 // function.
11997
11998 // (Relevant) CFG after transformation:
11999 // TP entry MBB
12000 // |
12001 // |-----------------|
12002 // (n <= 0) (n > 0)
12003 // | |
12004 // | TP loop Body MBB<--|
12005 // | | |
12006 // \ |___________|
12007 // \ /
12008 // TP exit MBB
12009
12010 MachineFunction *MF = BB->getParent();
12011 MachineFunctionProperties &Properties = MF->getProperties();
12013
12014 Register OpDestReg = MI.getOperand(0).getReg();
12015 Register OpSrcReg = MI.getOperand(1).getReg();
12016 Register OpSizeReg = MI.getOperand(2).getReg();
12017
12018 // Allocate the required MBBs and add to parent function.
12019 MachineBasicBlock *TpEntry = BB;
12020 MachineBasicBlock *TpLoopBody = MF->CreateMachineBasicBlock();
12021 MachineBasicBlock *TpExit;
12022
12023 MF->push_back(TpLoopBody);
12024
12025 // If any instructions are present in the current block after
12026 // MVE_MEMCPYLOOPINST or MVE_MEMSETLOOPINST, split the current block and
12027 // move the instructions into the newly created exit block. If there are no
12028 // instructions add an explicit branch to the FallThrough block and then
12029 // split.
12030 //
12031 // The split is required for two reasons:
12032 // 1) A terminator(t2WhileLoopStart) will be placed at that site.
12033 // 2) Since a TPLoopBody will be added later, any phis in successive blocks
12034 // need to be updated. splitAt() already handles this.
12035 TpExit = BB->splitAt(MI, false);
12036 if (TpExit == BB) {
12037 assert(BB->canFallThrough() && "Exit Block must be Fallthrough of the "
12038 "block containing memcpy/memset Pseudo");
12039 TpExit = BB->getFallThrough();
12040 BuildMI(BB, dl, TII->get(ARM::t2B))
12041 .addMBB(TpExit)
12043 TpExit = BB->splitAt(MI, false);
12044 }
12045
12046 // Add logic for iteration count
12047 Register TotalIterationsReg =
12048 genTPEntry(TpEntry, TpLoopBody, TpExit, OpSizeReg, TII, dl, MRI);
12049
12050 // Add the vectorized (and predicated) loads/store instructions
12051 bool IsMemcpy = MI.getOpcode() == ARM::MVE_MEMCPYLOOPINST;
12052 genTPLoopBody(TpLoopBody, TpEntry, TpExit, TII, dl, MRI, OpSrcReg,
12053 OpDestReg, OpSizeReg, TotalIterationsReg, IsMemcpy);
12054
12055 // Required to avoid conflict with the MachineVerifier during testing.
12057
12058 // Connect the blocks
12059 TpEntry->addSuccessor(TpLoopBody);
12060 TpLoopBody->addSuccessor(TpLoopBody);
12061 TpLoopBody->addSuccessor(TpExit);
12062
12063 // Reorder for a more natural layout
12064 TpLoopBody->moveAfter(TpEntry);
12065 TpExit->moveAfter(TpLoopBody);
12066
12067 // Finally, remove the memcpy Pseudo Instruction
12068 MI.eraseFromParent();
12069
12070 // Return the exit block as it may contain other instructions requiring a
12071 // custom inserter
12072 return TpExit;
12073 }
12074
12075 // The Thumb2 pre-indexed stores have the same MI operands, they just
12076 // define them differently in the .td files from the isel patterns, so
12077 // they need pseudos.
12078 case ARM::t2STR_preidx:
12079 MI.setDesc(TII->get(ARM::t2STR_PRE));
12080 return BB;
12081 case ARM::t2STRB_preidx:
12082 MI.setDesc(TII->get(ARM::t2STRB_PRE));
12083 return BB;
12084 case ARM::t2STRH_preidx:
12085 MI.setDesc(TII->get(ARM::t2STRH_PRE));
12086 return BB;
12087
12088 case ARM::STRi_preidx:
12089 case ARM::STRBi_preidx: {
12090 unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
12091 : ARM::STRB_PRE_IMM;
12092 // Decode the offset.
12093 unsigned Offset = MI.getOperand(4).getImm();
12094 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
12096 if (isSub)
12097 Offset = -Offset;
12098
12099 MachineMemOperand *MMO = *MI.memoperands_begin();
12100 BuildMI(*BB, MI, dl, TII->get(NewOpc))
12101 .add(MI.getOperand(0)) // Rn_wb
12102 .add(MI.getOperand(1)) // Rt
12103 .add(MI.getOperand(2)) // Rn
12104 .addImm(Offset) // offset (skip GPR==zero_reg)
12105 .add(MI.getOperand(5)) // pred
12106 .add(MI.getOperand(6))
12107 .addMemOperand(MMO);
12108 MI.eraseFromParent();
12109 return BB;
12110 }
12111 case ARM::STRr_preidx:
12112 case ARM::STRBr_preidx:
12113 case ARM::STRH_preidx: {
12114 unsigned NewOpc;
12115 switch (MI.getOpcode()) {
12116 default: llvm_unreachable("unexpected opcode!");
12117 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
12118 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
12119 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
12120 }
12121 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
12122 for (const MachineOperand &MO : MI.operands())
12123 MIB.add(MO);
12124 MI.eraseFromParent();
12125 return BB;
12126 }
12127
12128 case ARM::tMOVCCr_pseudo: {
12129 // To "insert" a SELECT_CC instruction, we actually have to insert the
12130 // diamond control-flow pattern. The incoming instruction knows the
12131 // destination vreg to set, the condition code register to branch on, the
12132 // true/false values to select between, and a branch opcode to use.
12133 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12135
12136 // thisMBB:
12137 // ...
12138 // TrueVal = ...
12139 // cmpTY ccX, r1, r2
12140 // bCC copy1MBB
12141 // fallthrough --> copy0MBB
12142 MachineBasicBlock *thisMBB = BB;
12143 MachineFunction *F = BB->getParent();
12144 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12145 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12146 F->insert(It, copy0MBB);
12147 F->insert(It, sinkMBB);
12148
12149 // Set the call frame size on entry to the new basic blocks.
12150 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12151 copy0MBB->setCallFrameSize(CallFrameSize);
12152 sinkMBB->setCallFrameSize(CallFrameSize);
12153
12154 // Check whether CPSR is live past the tMOVCCr_pseudo.
12155 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
12156 if (!MI.killsRegister(ARM::CPSR, /*TRI=*/nullptr) &&
12157 !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
12158 copy0MBB->addLiveIn(ARM::CPSR);
12159 sinkMBB->addLiveIn(ARM::CPSR);
12160 }
12161
12162 // Transfer the remainder of BB and its successor edges to sinkMBB.
12163 sinkMBB->splice(sinkMBB->begin(), BB,
12164 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12166
12167 BB->addSuccessor(copy0MBB);
12168 BB->addSuccessor(sinkMBB);
12169
12170 BuildMI(BB, dl, TII->get(ARM::tBcc))
12171 .addMBB(sinkMBB)
12172 .addImm(MI.getOperand(3).getImm())
12173 .addReg(MI.getOperand(4).getReg());
12174
12175 // copy0MBB:
12176 // %FalseValue = ...
12177 // # fallthrough to sinkMBB
12178 BB = copy0MBB;
12179
12180 // Update machine-CFG edges
12181 BB->addSuccessor(sinkMBB);
12182
12183 // sinkMBB:
12184 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12185 // ...
12186 BB = sinkMBB;
12187 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
12188 .addReg(MI.getOperand(1).getReg())
12189 .addMBB(copy0MBB)
12190 .addReg(MI.getOperand(2).getReg())
12191 .addMBB(thisMBB);
12192
12193 MI.eraseFromParent(); // The pseudo instruction is gone now.
12194 return BB;
12195 }
12196
12197 case ARM::BCCi64:
12198 case ARM::BCCZi64: {
12199 // If there is an unconditional branch to the other successor, remove it.
12200 BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
12201
12202 // Compare both parts that make up the double comparison separately for
12203 // equality.
12204 bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
12205
12206 Register LHS1 = MI.getOperand(1).getReg();
12207 Register LHS2 = MI.getOperand(2).getReg();
12208 if (RHSisZero) {
12209 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12210 .addReg(LHS1)
12211 .addImm(0)
12213 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12214 .addReg(LHS2).addImm(0)
12215 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12216 } else {
12217 Register RHS1 = MI.getOperand(3).getReg();
12218 Register RHS2 = MI.getOperand(4).getReg();
12219 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12220 .addReg(LHS1)
12221 .addReg(RHS1)
12223 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12224 .addReg(LHS2).addReg(RHS2)
12225 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12226 }
12227
12228 MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
12229 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
12230 if (MI.getOperand(0).getImm() == ARMCC::NE)
12231 std::swap(destMBB, exitMBB);
12232
12233 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
12234 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
12235 if (isThumb2)
12236 BuildMI(BB, dl, TII->get(ARM::t2B))
12237 .addMBB(exitMBB)
12239 else
12240 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
12241
12242 MI.eraseFromParent(); // The pseudo instruction is gone now.
12243 return BB;
12244 }
12245
12246 case ARM::Int_eh_sjlj_setjmp:
12247 case ARM::Int_eh_sjlj_setjmp_nofp:
12248 case ARM::tInt_eh_sjlj_setjmp:
12249 case ARM::t2Int_eh_sjlj_setjmp:
12250 case ARM::t2Int_eh_sjlj_setjmp_nofp:
12251 return BB;
12252
12253 case ARM::Int_eh_sjlj_setup_dispatch:
12254 EmitSjLjDispatchBlock(MI, BB);
12255 return BB;
12256
12257 case ARM::ABS:
12258 case ARM::t2ABS: {
12259 // To insert an ABS instruction, we have to insert the
12260 // diamond control-flow pattern. The incoming instruction knows the
12261 // source vreg to test against 0, the destination vreg to set,
12262 // the condition code register to branch on, the
12263 // true/false values to select between, and a branch opcode to use.
12264 // It transforms
12265 // V1 = ABS V0
12266 // into
12267 // V2 = MOVS V0
12268 // BCC (branch to SinkBB if V0 >= 0)
12269 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
12270 // SinkBB: V1 = PHI(V2, V3)
12271 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12273 MachineFunction *Fn = BB->getParent();
12274 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12275 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12276 Fn->insert(BBI, RSBBB);
12277 Fn->insert(BBI, SinkBB);
12278
12279 Register ABSSrcReg = MI.getOperand(1).getReg();
12280 Register ABSDstReg = MI.getOperand(0).getReg();
12281 bool ABSSrcKIll = MI.getOperand(1).isKill();
12282 bool isThumb2 = Subtarget->isThumb2();
12284 // In Thumb mode S must not be specified if source register is the SP or
12285 // PC and if destination register is the SP, so restrict register class
12286 Register NewRsbDstReg = MRI.createVirtualRegister(
12287 isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
12288
12289 // Transfer the remainder of BB and its successor edges to sinkMBB.
12290 SinkBB->splice(SinkBB->begin(), BB,
12291 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12293
12294 BB->addSuccessor(RSBBB);
12295 BB->addSuccessor(SinkBB);
12296
12297 // fall through to SinkMBB
12298 RSBBB->addSuccessor(SinkBB);
12299
12300 // insert a cmp at the end of BB
12301 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12302 .addReg(ABSSrcReg)
12303 .addImm(0)
12305
12306 // insert a bcc with opposite CC to ARMCC::MI at the end of BB
12307 BuildMI(BB, dl,
12308 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
12310
12311 // insert rsbri in RSBBB
12312 // Note: BCC and rsbri will be converted into predicated rsbmi
12313 // by if-conversion pass
12314 BuildMI(*RSBBB, RSBBB->begin(), dl,
12315 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
12316 .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
12317 .addImm(0)
12319 .add(condCodeOp());
12320
12321 // insert PHI in SinkBB,
12322 // reuse ABSDstReg to not change uses of ABS instruction
12323 BuildMI(*SinkBB, SinkBB->begin(), dl,
12324 TII->get(ARM::PHI), ABSDstReg)
12325 .addReg(NewRsbDstReg).addMBB(RSBBB)
12326 .addReg(ABSSrcReg).addMBB(BB);
12327
12328 // remove ABS instruction
12329 MI.eraseFromParent();
12330
12331 // return last added BB
12332 return SinkBB;
12333 }
12334 case ARM::COPY_STRUCT_BYVAL_I32:
12335 ++NumLoopByVals;
12336 return EmitStructByval(MI, BB);
12337 case ARM::WIN__CHKSTK:
12338 return EmitLowered__chkstk(MI, BB);
12339 case ARM::WIN__DBZCHK:
12340 return EmitLowered__dbzchk(MI, BB);
12341 }
12342}
12343
12344/// Attaches vregs to MEMCPY that it will use as scratch registers
12345/// when it is expanded into LDM/STM. This is done as a post-isel lowering
12346/// instead of as a custom inserter because we need the use list from the SDNode.
12347static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
12348 MachineInstr &MI, const SDNode *Node) {
12349 bool isThumb1 = Subtarget->isThumb1Only();
12350
12351 DebugLoc DL = MI.getDebugLoc();
12352 MachineFunction *MF = MI.getParent()->getParent();
12354 MachineInstrBuilder MIB(*MF, MI);
12355
12356 // If the new dst/src is unused mark it as dead.
12357 if (!Node->hasAnyUseOfValue(0)) {
12358 MI.getOperand(0).setIsDead(true);
12359 }
12360 if (!Node->hasAnyUseOfValue(1)) {
12361 MI.getOperand(1).setIsDead(true);
12362 }
12363
12364 // The MEMCPY both defines and kills the scratch registers.
12365 for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
12366 Register TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
12367 : &ARM::GPRRegClass);
12369 }
12370}
12371
12373 SDNode *Node) const {
12374 if (MI.getOpcode() == ARM::MEMCPY) {
12375 attachMEMCPYScratchRegs(Subtarget, MI, Node);
12376 return;
12377 }
12378
12379 const MCInstrDesc *MCID = &MI.getDesc();
12380 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
12381 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
12382 // operand is still set to noreg. If needed, set the optional operand's
12383 // register to CPSR, and remove the redundant implicit def.
12384 //
12385 // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
12386
12387 // Rename pseudo opcodes.
12388 unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
12389 unsigned ccOutIdx;
12390 if (NewOpc) {
12391 const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
12392 MCID = &TII->get(NewOpc);
12393
12394 assert(MCID->getNumOperands() ==
12395 MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
12396 && "converted opcode should be the same except for cc_out"
12397 " (and, on Thumb1, pred)");
12398
12399 MI.setDesc(*MCID);
12400
12401 // Add the optional cc_out operand
12402 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
12403
12404 // On Thumb1, move all input operands to the end, then add the predicate
12405 if (Subtarget->isThumb1Only()) {
12406 for (unsigned c = MCID->getNumOperands() - 4; c--;) {
12407 MI.addOperand(MI.getOperand(1));
12408 MI.removeOperand(1);
12409 }
12410
12411 // Restore the ties
12412 for (unsigned i = MI.getNumOperands(); i--;) {
12413 const MachineOperand& op = MI.getOperand(i);
12414 if (op.isReg() && op.isUse()) {
12415 int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
12416 if (DefIdx != -1)
12417 MI.tieOperands(DefIdx, i);
12418 }
12419 }
12420
12422 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
12423 ccOutIdx = 1;
12424 } else
12425 ccOutIdx = MCID->getNumOperands() - 1;
12426 } else
12427 ccOutIdx = MCID->getNumOperands() - 1;
12428
12429 // Any ARM instruction that sets the 's' bit should specify an optional
12430 // "cc_out" operand in the last operand position.
12431 if (!MI.hasOptionalDef() || !MCID->operands()[ccOutIdx].isOptionalDef()) {
12432 assert(!NewOpc && "Optional cc_out operand required");
12433 return;
12434 }
12435 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
12436 // since we already have an optional CPSR def.
12437 bool definesCPSR = false;
12438 bool deadCPSR = false;
12439 for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
12440 ++i) {
12441 const MachineOperand &MO = MI.getOperand(i);
12442 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
12443 definesCPSR = true;
12444 if (MO.isDead())
12445 deadCPSR = true;
12446 MI.removeOperand(i);
12447 break;
12448 }
12449 }
12450 if (!definesCPSR) {
12451 assert(!NewOpc && "Optional cc_out operand required");
12452 return;
12453 }
12454 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
12455 if (deadCPSR) {
12456 assert(!MI.getOperand(ccOutIdx).getReg() &&
12457 "expect uninitialized optional cc_out operand");
12458 // Thumb1 instructions must have the S bit even if the CPSR is dead.
12459 if (!Subtarget->isThumb1Only())
12460 return;
12461 }
12462
12463 // If this instruction was defined with an optional CPSR def and its dag node
12464 // had a live implicit CPSR def, then activate the optional CPSR def.
12465 MachineOperand &MO = MI.getOperand(ccOutIdx);
12466 MO.setReg(ARM::CPSR);
12467 MO.setIsDef(true);
12468}
12469
12470//===----------------------------------------------------------------------===//
12471// ARM Optimization Hooks
12472//===----------------------------------------------------------------------===//
12473
12474// Helper function that checks if N is a null or all ones constant.
12475static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
12477}
12478
12479// Return true if N is conditionally 0 or all ones.
12480// Detects these expressions where cc is an i1 value:
12481//
12482// (select cc 0, y) [AllOnes=0]
12483// (select cc y, 0) [AllOnes=0]
12484// (zext cc) [AllOnes=0]
12485// (sext cc) [AllOnes=0/1]
12486// (select cc -1, y) [AllOnes=1]
12487// (select cc y, -1) [AllOnes=1]
12488//
12489// Invert is set when N is the null/all ones constant when CC is false.
12490// OtherOp is set to the alternative value of N.
12492 SDValue &CC, bool &Invert,
12493 SDValue &OtherOp,
12494 SelectionDAG &DAG) {
12495 switch (N->getOpcode()) {
12496 default: return false;
12497 case ISD::SELECT: {
12498 CC = N->getOperand(0);
12499 SDValue N1 = N->getOperand(1);
12500 SDValue N2 = N->getOperand(2);
12501 if (isZeroOrAllOnes(N1, AllOnes)) {
12502 Invert = false;
12503 OtherOp = N2;
12504 return true;
12505 }
12506 if (isZeroOrAllOnes(N2, AllOnes)) {
12507 Invert = true;
12508 OtherOp = N1;
12509 return true;
12510 }
12511 return false;
12512 }
12513 case ISD::ZERO_EXTEND:
12514 // (zext cc) can never be the all ones value.
12515 if (AllOnes)
12516 return false;
12517 [[fallthrough]];
12518 case ISD::SIGN_EXTEND: {
12519 SDLoc dl(N);
12520 EVT VT = N->getValueType(0);
12521 CC = N->getOperand(0);
12522 if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
12523 return false;
12524 Invert = !AllOnes;
12525 if (AllOnes)
12526 // When looking for an AllOnes constant, N is an sext, and the 'other'
12527 // value is 0.
12528 OtherOp = DAG.getConstant(0, dl, VT);
12529 else if (N->getOpcode() == ISD::ZERO_EXTEND)
12530 // When looking for a 0 constant, N can be zext or sext.
12531 OtherOp = DAG.getConstant(1, dl, VT);
12532 else
12533 OtherOp = DAG.getAllOnesConstant(dl, VT);
12534 return true;
12535 }
12536 }
12537}
12538
12539// Combine a constant select operand into its use:
12540//
12541// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
12542// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
12543// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
12544// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
12545// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
12546//
12547// The transform is rejected if the select doesn't have a constant operand that
12548// is null, or all ones when AllOnes is set.
12549//
12550// Also recognize sext/zext from i1:
12551//
12552// (add (zext cc), x) -> (select cc (add x, 1), x)
12553// (add (sext cc), x) -> (select cc (add x, -1), x)
12554//
12555// These transformations eventually create predicated instructions.
12556//
12557// @param N The node to transform.
12558// @param Slct The N operand that is a select.
12559// @param OtherOp The other N operand (x above).
12560// @param DCI Context.
12561// @param AllOnes Require the select constant to be all ones instead of null.
12562// @returns The new node, or SDValue() on failure.
12563static
12566 bool AllOnes = false) {
12567 SelectionDAG &DAG = DCI.DAG;
12568 EVT VT = N->getValueType(0);
12569 SDValue NonConstantVal;
12570 SDValue CCOp;
12571 bool SwapSelectOps;
12572 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
12573 NonConstantVal, DAG))
12574 return SDValue();
12575
12576 // Slct is now know to be the desired identity constant when CC is true.
12577 SDValue TrueVal = OtherOp;
12578 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
12579 OtherOp, NonConstantVal);
12580 // Unless SwapSelectOps says CC should be false.
12581 if (SwapSelectOps)
12582 std::swap(TrueVal, FalseVal);
12583
12584 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12585 CCOp, TrueVal, FalseVal);
12586}
12587
12588// Attempt combineSelectAndUse on each operand of a commutative operator N.
12589static
12592 SDValue N0 = N->getOperand(0);
12593 SDValue N1 = N->getOperand(1);
12594 if (N0.getNode()->hasOneUse())
12595 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
12596 return Result;
12597 if (N1.getNode()->hasOneUse())
12598 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
12599 return Result;
12600 return SDValue();
12601}
12602
12604 // VUZP shuffle node.
12605 if (N->getOpcode() == ARMISD::VUZP)
12606 return true;
12607
12608 // "VUZP" on i32 is an alias for VTRN.
12609 if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
12610 return true;
12611
12612 return false;
12613}
12614
12617 const ARMSubtarget *Subtarget) {
12618 // Look for ADD(VUZP.0, VUZP.1).
12619 if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
12620 N0 == N1)
12621 return SDValue();
12622
12623 // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
12624 if (!N->getValueType(0).is64BitVector())
12625 return SDValue();
12626
12627 // Generate vpadd.
12628 SelectionDAG &DAG = DCI.DAG;
12629 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12630 SDLoc dl(N);
12631 SDNode *Unzip = N0.getNode();
12632 EVT VT = N->getValueType(0);
12633
12635 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
12636 TLI.getPointerTy(DAG.getDataLayout())));
12637 Ops.push_back(Unzip->getOperand(0));
12638 Ops.push_back(Unzip->getOperand(1));
12639
12640 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12641}
12642
12645 const ARMSubtarget *Subtarget) {
12646 // Check for two extended operands.
12647 if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
12648 N1.getOpcode() == ISD::SIGN_EXTEND) &&
12649 !(N0.getOpcode() == ISD::ZERO_EXTEND &&
12650 N1.getOpcode() == ISD::ZERO_EXTEND))
12651 return SDValue();
12652
12653 SDValue N00 = N0.getOperand(0);
12654 SDValue N10 = N1.getOperand(0);
12655
12656 // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
12657 if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
12658 N00 == N10)
12659 return SDValue();
12660
12661 // We only recognize Q register paddl here; this can't be reached until
12662 // after type legalization.
12663 if (!N00.getValueType().is64BitVector() ||
12665 return SDValue();
12666
12667 // Generate vpaddl.
12668 SelectionDAG &DAG = DCI.DAG;
12669 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12670 SDLoc dl(N);
12671 EVT VT = N->getValueType(0);
12672
12674 // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
12675 unsigned Opcode;
12676 if (N0.getOpcode() == ISD::SIGN_EXTEND)
12677 Opcode = Intrinsic::arm_neon_vpaddls;
12678 else
12679 Opcode = Intrinsic::arm_neon_vpaddlu;
12680 Ops.push_back(DAG.getConstant(Opcode, dl,
12681 TLI.getPointerTy(DAG.getDataLayout())));
12682 EVT ElemTy = N00.getValueType().getVectorElementType();
12683 unsigned NumElts = VT.getVectorNumElements();
12684 EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
12685 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
12686 N00.getOperand(0), N00.getOperand(1));
12687 Ops.push_back(Concat);
12688
12689 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12690}
12691
12692// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
12693// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
12694// much easier to match.
12695static SDValue
12698 const ARMSubtarget *Subtarget) {
12699 // Only perform optimization if after legalize, and if NEON is available. We
12700 // also expected both operands to be BUILD_VECTORs.
12701 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
12702 || N0.getOpcode() != ISD::BUILD_VECTOR
12703 || N1.getOpcode() != ISD::BUILD_VECTOR)
12704 return SDValue();
12705
12706 // Check output type since VPADDL operand elements can only be 8, 16, or 32.
12707 EVT VT = N->getValueType(0);
12708 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
12709 return SDValue();
12710
12711 // Check that the vector operands are of the right form.
12712 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
12713 // operands, where N is the size of the formed vector.
12714 // Each EXTRACT_VECTOR should have the same input vector and odd or even
12715 // index such that we have a pair wise add pattern.
12716
12717 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
12719 return SDValue();
12720 SDValue Vec = N0->getOperand(0)->getOperand(0);
12721 SDNode *V = Vec.getNode();
12722 unsigned nextIndex = 0;
12723
12724 // For each operands to the ADD which are BUILD_VECTORs,
12725 // check to see if each of their operands are an EXTRACT_VECTOR with
12726 // the same vector and appropriate index.
12727 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
12730
12731 SDValue ExtVec0 = N0->getOperand(i);
12732 SDValue ExtVec1 = N1->getOperand(i);
12733
12734 // First operand is the vector, verify its the same.
12735 if (V != ExtVec0->getOperand(0).getNode() ||
12736 V != ExtVec1->getOperand(0).getNode())
12737 return SDValue();
12738
12739 // Second is the constant, verify its correct.
12740 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
12741 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
12742
12743 // For the constant, we want to see all the even or all the odd.
12744 if (!C0 || !C1 || C0->getZExtValue() != nextIndex
12745 || C1->getZExtValue() != nextIndex+1)
12746 return SDValue();
12747
12748 // Increment index.
12749 nextIndex+=2;
12750 } else
12751 return SDValue();
12752 }
12753
12754 // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
12755 // we're using the entire input vector, otherwise there's a size/legality
12756 // mismatch somewhere.
12757 if (nextIndex != Vec.getValueType().getVectorNumElements() ||
12759 return SDValue();
12760
12761 // Create VPADDL node.
12762 SelectionDAG &DAG = DCI.DAG;
12763 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12764
12765 SDLoc dl(N);
12766
12767 // Build operand list.
12769 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
12770 TLI.getPointerTy(DAG.getDataLayout())));
12771
12772 // Input is the vector.
12773 Ops.push_back(Vec);
12774
12775 // Get widened type and narrowed type.
12776 MVT widenType;
12777 unsigned numElem = VT.getVectorNumElements();
12778
12779 EVT inputLaneType = Vec.getValueType().getVectorElementType();
12780 switch (inputLaneType.getSimpleVT().SimpleTy) {
12781 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
12782 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
12783 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
12784 default:
12785 llvm_unreachable("Invalid vector element type for padd optimization.");
12786 }
12787
12788 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
12789 unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
12790 return DAG.getNode(ExtOp, dl, VT, tmp);
12791}
12792
12794 if (V->getOpcode() == ISD::UMUL_LOHI ||
12795 V->getOpcode() == ISD::SMUL_LOHI)
12796 return V;
12797 return SDValue();
12798}
12799
12800static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
12802 const ARMSubtarget *Subtarget) {
12803 if (!Subtarget->hasBaseDSP())
12804 return SDValue();
12805
12806 // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
12807 // accumulates the product into a 64-bit value. The 16-bit values will
12808 // be sign extended somehow or SRA'd into 32-bit values
12809 // (addc (adde (mul 16bit, 16bit), lo), hi)
12810 SDValue Mul = AddcNode->getOperand(0);
12811 SDValue Lo = AddcNode->getOperand(1);
12812 if (Mul.getOpcode() != ISD::MUL) {
12813 Lo = AddcNode->getOperand(0);
12814 Mul = AddcNode->getOperand(1);
12815 if (Mul.getOpcode() != ISD::MUL)
12816 return SDValue();
12817 }
12818
12819 SDValue SRA = AddeNode->getOperand(0);
12820 SDValue Hi = AddeNode->getOperand(1);
12821 if (SRA.getOpcode() != ISD::SRA) {
12822 SRA = AddeNode->getOperand(1);
12823 Hi = AddeNode->getOperand(0);
12824 if (SRA.getOpcode() != ISD::SRA)
12825 return SDValue();
12826 }
12827 if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
12828 if (Const->getZExtValue() != 31)
12829 return SDValue();
12830 } else
12831 return SDValue();
12832
12833 if (SRA.getOperand(0) != Mul)
12834 return SDValue();
12835
12836 SelectionDAG &DAG = DCI.DAG;
12837 SDLoc dl(AddcNode);
12838 unsigned Opcode = 0;
12839 SDValue Op0;
12840 SDValue Op1;
12841
12842 if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
12843 Opcode = ARMISD::SMLALBB;
12844 Op0 = Mul.getOperand(0);
12845 Op1 = Mul.getOperand(1);
12846 } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
12847 Opcode = ARMISD::SMLALBT;
12848 Op0 = Mul.getOperand(0);
12849 Op1 = Mul.getOperand(1).getOperand(0);
12850 } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
12851 Opcode = ARMISD::SMLALTB;
12852 Op0 = Mul.getOperand(0).getOperand(0);
12853 Op1 = Mul.getOperand(1);
12854 } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
12855 Opcode = ARMISD::SMLALTT;
12856 Op0 = Mul->getOperand(0).getOperand(0);
12857 Op1 = Mul->getOperand(1).getOperand(0);
12858 }
12859
12860 if (!Op0 || !Op1)
12861 return SDValue();
12862
12863 SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
12864 Op0, Op1, Lo, Hi);
12865 // Replace the ADDs' nodes uses by the MLA node's values.
12866 SDValue HiMLALResult(SMLAL.getNode(), 1);
12867 SDValue LoMLALResult(SMLAL.getNode(), 0);
12868
12869 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
12870 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
12871
12872 // Return original node to notify the driver to stop replacing.
12873 SDValue resNode(AddcNode, 0);
12874 return resNode;
12875}
12876
12879 const ARMSubtarget *Subtarget) {
12880 // Look for multiply add opportunities.
12881 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
12882 // each add nodes consumes a value from ISD::UMUL_LOHI and there is
12883 // a glue link from the first add to the second add.
12884 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
12885 // a S/UMLAL instruction.
12886 // UMUL_LOHI
12887 // / :lo \ :hi
12888 // V \ [no multiline comment]
12889 // loAdd -> ADDC |
12890 // \ :carry /
12891 // V V
12892 // ADDE <- hiAdd
12893 //
12894 // In the special case where only the higher part of a signed result is used
12895 // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts
12896 // a constant with the exact value of 0x80000000, we recognize we are dealing
12897 // with a "rounded multiply and add" (or subtract) and transform it into
12898 // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.
12899
12900 assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||
12901 AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
12902 "Expect an ADDE or SUBE");
12903
12904 assert(AddeSubeNode->getNumOperands() == 3 &&
12905 AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&
12906 "ADDE node has the wrong inputs");
12907
12908 // Check that we are chained to the right ADDC or SUBC node.
12909 SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
12910 if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12911 AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
12912 (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
12913 AddcSubcNode->getOpcode() != ARMISD::SUBC))
12914 return SDValue();
12915
12916 SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
12917 SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
12918
12919 // Check if the two operands are from the same mul_lohi node.
12920 if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())
12921 return SDValue();
12922
12923 assert(AddcSubcNode->getNumValues() == 2 &&
12924 AddcSubcNode->getValueType(0) == MVT::i32 &&
12925 "Expect ADDC with two result values. First: i32");
12926
12927 // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
12928 // maybe a SMLAL which multiplies two 16-bit values.
12929 if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12930 AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
12931 AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
12932 AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
12933 AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
12934 return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);
12935
12936 // Check for the triangle shape.
12937 SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
12938 SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
12939
12940 // Make sure that the ADDE/SUBE operands are not coming from the same node.
12941 if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())
12942 return SDValue();
12943
12944 // Find the MUL_LOHI node walking up ADDE/SUBE's operands.
12945 bool IsLeftOperandMUL = false;
12946 SDValue MULOp = findMUL_LOHI(AddeSubeOp0);
12947 if (MULOp == SDValue())
12948 MULOp = findMUL_LOHI(AddeSubeOp1);
12949 else
12950 IsLeftOperandMUL = true;
12951 if (MULOp == SDValue())
12952 return SDValue();
12953
12954 // Figure out the right opcode.
12955 unsigned Opc = MULOp->getOpcode();
12956 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
12957
12958 // Figure out the high and low input values to the MLAL node.
12959 SDValue *HiAddSub = nullptr;
12960 SDValue *LoMul = nullptr;
12961 SDValue *LowAddSub = nullptr;
12962
12963 // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.
12964 if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))
12965 return SDValue();
12966
12967 if (IsLeftOperandMUL)
12968 HiAddSub = &AddeSubeOp1;
12969 else
12970 HiAddSub = &AddeSubeOp0;
12971
12972 // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node
12973 // whose low result is fed to the ADDC/SUBC we are checking.
12974
12975 if (AddcSubcOp0 == MULOp.getValue(0)) {
12976 LoMul = &AddcSubcOp0;
12977 LowAddSub = &AddcSubcOp1;
12978 }
12979 if (AddcSubcOp1 == MULOp.getValue(0)) {
12980 LoMul = &AddcSubcOp1;
12981 LowAddSub = &AddcSubcOp0;
12982 }
12983
12984 if (!LoMul)
12985 return SDValue();
12986
12987 // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC
12988 // the replacement below will create a cycle.
12989 if (AddcSubcNode == HiAddSub->getNode() ||
12990 AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
12991 return SDValue();
12992
12993 // Create the merged node.
12994 SelectionDAG &DAG = DCI.DAG;
12995
12996 // Start building operand list.
12998 Ops.push_back(LoMul->getOperand(0));
12999 Ops.push_back(LoMul->getOperand(1));
13000
13001 // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be
13002 // the case, we must be doing signed multiplication and only use the higher
13003 // part of the result of the MLAL, furthermore the LowAddSub must be a constant
13004 // addition or subtraction with the value of 0x800000.
13005 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
13006 FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
13007 LowAddSub->getNode()->getOpcode() == ISD::Constant &&
13008 static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
13009 0x80000000) {
13010 Ops.push_back(*HiAddSub);
13011 if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
13012 FinalOpc = ARMISD::SMMLSR;
13013 } else {
13014 FinalOpc = ARMISD::SMMLAR;
13015 }
13016 SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);
13017 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);
13018
13019 return SDValue(AddeSubeNode, 0);
13020 } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
13021 // SMMLS is generated during instruction selection and the rest of this
13022 // function can not handle the case where AddcSubcNode is a SUBC.
13023 return SDValue();
13024
13025 // Finish building the operand list for {U/S}MLAL
13026 Ops.push_back(*LowAddSub);
13027 Ops.push_back(*HiAddSub);
13028
13029 SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),
13030 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13031
13032 // Replace the ADDs' nodes uses by the MLA node's values.
13033 SDValue HiMLALResult(MLALNode.getNode(), 1);
13034 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);
13035
13036 SDValue LoMLALResult(MLALNode.getNode(), 0);
13037 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);
13038
13039 // Return original node to notify the driver to stop replacing.
13040 return SDValue(AddeSubeNode, 0);
13041}
13042
13045 const ARMSubtarget *Subtarget) {
13046 // UMAAL is similar to UMLAL except that it adds two unsigned values.
13047 // While trying to combine for the other MLAL nodes, first search for the
13048 // chance to use UMAAL. Check if Addc uses a node which has already
13049 // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
13050 // as the addend, and it's handled in PerformUMLALCombine.
13051
13052 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13053 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13054
13055 // Check that we have a glued ADDC node.
13056 SDNode* AddcNode = AddeNode->getOperand(2).getNode();
13057 if (AddcNode->getOpcode() != ARMISD::ADDC)
13058 return SDValue();
13059
13060 // Find the converted UMAAL or quit if it doesn't exist.
13061 SDNode *UmlalNode = nullptr;
13062 SDValue AddHi;
13063 if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
13064 UmlalNode = AddcNode->getOperand(0).getNode();
13065 AddHi = AddcNode->getOperand(1);
13066 } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
13067 UmlalNode = AddcNode->getOperand(1).getNode();
13068 AddHi = AddcNode->getOperand(0);
13069 } else {
13070 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13071 }
13072
13073 // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
13074 // the ADDC as well as Zero.
13075 if (!isNullConstant(UmlalNode->getOperand(3)))
13076 return SDValue();
13077
13078 if ((isNullConstant(AddeNode->getOperand(0)) &&
13079 AddeNode->getOperand(1).getNode() == UmlalNode) ||
13080 (AddeNode->getOperand(0).getNode() == UmlalNode &&
13081 isNullConstant(AddeNode->getOperand(1)))) {
13082 SelectionDAG &DAG = DCI.DAG;
13083 SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
13084 UmlalNode->getOperand(2), AddHi };
13085 SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
13086 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13087
13088 // Replace the ADDs' nodes uses by the UMAAL node's values.
13089 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
13090 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
13091
13092 // Return original node to notify the driver to stop replacing.
13093 return SDValue(AddeNode, 0);
13094 }
13095 return SDValue();
13096}
13097
13099 const ARMSubtarget *Subtarget) {
13100 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13101 return SDValue();
13102
13103 // Check that we have a pair of ADDC and ADDE as operands.
13104 // Both addends of the ADDE must be zero.
13105 SDNode* AddcNode = N->getOperand(2).getNode();
13106 SDNode* AddeNode = N->getOperand(3).getNode();
13107 if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
13108 (AddeNode->getOpcode() == ARMISD::ADDE) &&
13109 isNullConstant(AddeNode->getOperand(0)) &&
13110 isNullConstant(AddeNode->getOperand(1)) &&
13111 (AddeNode->getOperand(2).getNode() == AddcNode))
13112 return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
13113 DAG.getVTList(MVT::i32, MVT::i32),
13114 {N->getOperand(0), N->getOperand(1),
13115 AddcNode->getOperand(0), AddcNode->getOperand(1)});
13116 else
13117 return SDValue();
13118}
13119
13122 const ARMSubtarget *Subtarget) {
13123 SelectionDAG &DAG(DCI.DAG);
13124
13125 if (N->getOpcode() == ARMISD::SUBC && N->hasAnyUseOfValue(1)) {
13126 // (SUBC (ADDE 0, 0, C), 1) -> C
13127 SDValue LHS = N->getOperand(0);
13128 SDValue RHS = N->getOperand(1);
13129 if (LHS->getOpcode() == ARMISD::ADDE &&
13130 isNullConstant(LHS->getOperand(0)) &&
13131 isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
13132 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
13133 }
13134 }
13135
13136 if (Subtarget->isThumb1Only()) {
13137 SDValue RHS = N->getOperand(1);
13138 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13139 int32_t imm = C->getSExtValue();
13140 if (imm < 0 && imm > std::numeric_limits<int>::min()) {
13141 SDLoc DL(N);
13142 RHS = DAG.getConstant(-imm, DL, MVT::i32);
13143 unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
13144 : ARMISD::ADDC;
13145 return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
13146 }
13147 }
13148 }
13149
13150 return SDValue();
13151}
13152
13155 const ARMSubtarget *Subtarget) {
13156 if (Subtarget->isThumb1Only()) {
13157 SelectionDAG &DAG = DCI.DAG;
13158 SDValue RHS = N->getOperand(1);
13159 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13160 int64_t imm = C->getSExtValue();
13161 if (imm < 0) {
13162 SDLoc DL(N);
13163
13164 // The with-carry-in form matches bitwise not instead of the negation.
13165 // Effectively, the inverse interpretation of the carry flag already
13166 // accounts for part of the negation.
13167 RHS = DAG.getConstant(~imm, DL, MVT::i32);
13168
13169 unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
13170 : ARMISD::ADDE;
13171 return DAG.getNode(Opcode, DL, N->getVTList(),
13172 N->getOperand(0), RHS, N->getOperand(2));
13173 }
13174 }
13175 } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
13176 return AddCombineTo64bitMLAL(N, DCI, Subtarget);
13177 }
13178 return SDValue();
13179}
13180
13183 const ARMSubtarget *Subtarget) {
13184 if (!Subtarget->hasMVEIntegerOps())
13185 return SDValue();
13186
13187 SDLoc dl(N);
13188 SDValue SetCC;
13189 SDValue LHS;
13190 SDValue RHS;
13192 SDValue TrueVal;
13193 SDValue FalseVal;
13194
13195 if (N->getOpcode() == ISD::SELECT &&
13196 N->getOperand(0)->getOpcode() == ISD::SETCC) {
13197 SetCC = N->getOperand(0);
13198 LHS = SetCC->getOperand(0);
13199 RHS = SetCC->getOperand(1);
13200 CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
13201 TrueVal = N->getOperand(1);
13202 FalseVal = N->getOperand(2);
13203 } else if (N->getOpcode() == ISD::SELECT_CC) {
13204 LHS = N->getOperand(0);
13205 RHS = N->getOperand(1);
13206 CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
13207 TrueVal = N->getOperand(2);
13208 FalseVal = N->getOperand(3);
13209 } else {
13210 return SDValue();
13211 }
13212
13213 unsigned int Opcode = 0;
13214 if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
13215 FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
13216 (CC == ISD::SETULT || CC == ISD::SETUGT)) {
13217 Opcode = ARMISD::VMINVu;
13218 if (CC == ISD::SETUGT)
13219 std::swap(TrueVal, FalseVal);
13220 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
13221 FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
13222 (CC == ISD::SETLT || CC == ISD::SETGT)) {
13223 Opcode = ARMISD::VMINVs;
13224 if (CC == ISD::SETGT)
13225 std::swap(TrueVal, FalseVal);
13226 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
13227 FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
13228 (CC == ISD::SETUGT || CC == ISD::SETULT)) {
13229 Opcode = ARMISD::VMAXVu;
13230 if (CC == ISD::SETULT)
13231 std::swap(TrueVal, FalseVal);
13232 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
13233 FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
13234 (CC == ISD::SETGT || CC == ISD::SETLT)) {
13235 Opcode = ARMISD::VMAXVs;
13236 if (CC == ISD::SETLT)
13237 std::swap(TrueVal, FalseVal);
13238 } else
13239 return SDValue();
13240
13241 // Normalise to the right hand side being the vector reduction
13242 switch (TrueVal->getOpcode()) {
13247 std::swap(LHS, RHS);
13248 std::swap(TrueVal, FalseVal);
13249 break;
13250 }
13251
13252 EVT VectorType = FalseVal->getOperand(0).getValueType();
13253
13254 if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&
13255 VectorType != MVT::v4i32)
13256 return SDValue();
13257
13258 EVT VectorScalarType = VectorType.getVectorElementType();
13259
13260 // The values being selected must also be the ones being compared
13261 if (TrueVal != LHS || FalseVal != RHS)
13262 return SDValue();
13263
13264 EVT LeftType = LHS->getValueType(0);
13265 EVT RightType = RHS->getValueType(0);
13266
13267 // The types must match the reduced type too
13268 if (LeftType != VectorScalarType || RightType != VectorScalarType)
13269 return SDValue();
13270
13271 // Legalise the scalar to an i32
13272 if (VectorScalarType != MVT::i32)
13273 LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
13274
13275 // Generate the reduction as an i32 for legalisation purposes
13276 auto Reduction =
13277 DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
13278
13279 // The result isn't actually an i32 so truncate it back to its original type
13280 if (VectorScalarType != MVT::i32)
13281 Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);
13282
13283 return Reduction;
13284}
13285
13286// A special combine for the vqdmulh family of instructions. This is one of the
13287// potential set of patterns that could patch this instruction. The base pattern
13288// you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))).
13289// This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))),
13290// which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as
13291// the max is unnecessary.
13293 EVT VT = N->getValueType(0);
13294 SDValue Shft;
13295 ConstantSDNode *Clamp;
13296
13297 if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
13298 return SDValue();
13299
13300 if (N->getOpcode() == ISD::SMIN) {
13301 Shft = N->getOperand(0);
13302 Clamp = isConstOrConstSplat(N->getOperand(1));
13303 } else if (N->getOpcode() == ISD::VSELECT) {
13304 // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin.
13305 SDValue Cmp = N->getOperand(0);
13306 if (Cmp.getOpcode() != ISD::SETCC ||
13307 cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
13308 Cmp.getOperand(0) != N->getOperand(1) ||
13309 Cmp.getOperand(1) != N->getOperand(2))
13310 return SDValue();
13311 Shft = N->getOperand(1);
13312 Clamp = isConstOrConstSplat(N->getOperand(2));
13313 } else
13314 return SDValue();
13315
13316 if (!Clamp)
13317 return SDValue();
13318
13319 MVT ScalarType;
13320 int ShftAmt = 0;
13321 switch (Clamp->getSExtValue()) {
13322 case (1 << 7) - 1:
13323 ScalarType = MVT::i8;
13324 ShftAmt = 7;
13325 break;
13326 case (1 << 15) - 1:
13327 ScalarType = MVT::i16;
13328 ShftAmt = 15;
13329 break;
13330 case (1ULL << 31) - 1:
13331 ScalarType = MVT::i32;
13332 ShftAmt = 31;
13333 break;
13334 default:
13335 return SDValue();
13336 }
13337
13338 if (Shft.getOpcode() != ISD::SRA)
13339 return SDValue();
13341 if (!N1 || N1->getSExtValue() != ShftAmt)
13342 return SDValue();
13343
13344 SDValue Mul = Shft.getOperand(0);
13345 if (Mul.getOpcode() != ISD::MUL)
13346 return SDValue();
13347
13348 SDValue Ext0 = Mul.getOperand(0);
13349 SDValue Ext1 = Mul.getOperand(1);
13350 if (Ext0.getOpcode() != ISD::SIGN_EXTEND ||
13351 Ext1.getOpcode() != ISD::SIGN_EXTEND)
13352 return SDValue();
13353 EVT VecVT = Ext0.getOperand(0).getValueType();
13354 if (!VecVT.isPow2VectorType() || VecVT.getVectorNumElements() == 1)
13355 return SDValue();
13356 if (Ext1.getOperand(0).getValueType() != VecVT ||
13357 VecVT.getScalarType() != ScalarType ||
13358 VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2)
13359 return SDValue();
13360
13361 SDLoc DL(Mul);
13362 unsigned LegalLanes = 128 / (ShftAmt + 1);
13363 EVT LegalVecVT = MVT::getVectorVT(ScalarType, LegalLanes);
13364 // For types smaller than legal vectors extend to be legal and only use needed
13365 // lanes.
13366 if (VecVT.getSizeInBits() < 128) {
13367 EVT ExtVecVT =
13369 VecVT.getVectorNumElements());
13370 SDValue Inp0 =
13371 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext0.getOperand(0));
13372 SDValue Inp1 =
13373 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext1.getOperand(0));
13374 Inp0 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp0);
13375 Inp1 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp1);
13376 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13377 SDValue Trunc = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, ExtVecVT, VQDMULH);
13378 Trunc = DAG.getNode(ISD::TRUNCATE, DL, VecVT, Trunc);
13379 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Trunc);
13380 }
13381
13382 // For larger types, split into legal sized chunks.
13383 assert(VecVT.getSizeInBits() % 128 == 0 && "Expected a power2 type");
13384 unsigned NumParts = VecVT.getSizeInBits() / 128;
13386 for (unsigned I = 0; I < NumParts; ++I) {
13387 SDValue Inp0 =
13388 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext0.getOperand(0),
13389 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13390 SDValue Inp1 =
13391 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext1.getOperand(0),
13392 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13393 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13394 Parts.push_back(VQDMULH);
13395 }
13396 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT,
13397 DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Parts));
13398}
13399
13402 const ARMSubtarget *Subtarget) {
13403 if (!Subtarget->hasMVEIntegerOps())
13404 return SDValue();
13405
13406 if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG))
13407 return V;
13408
13409 // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
13410 //
13411 // We need to re-implement this optimization here as the implementation in the
13412 // Target-Independent DAGCombiner does not handle the kind of constant we make
13413 // (it calls isConstOrConstSplat with AllowTruncation set to false - and for
13414 // good reason, allowing truncation there would break other targets).
13415 //
13416 // Currently, this is only done for MVE, as it's the only target that benefits
13417 // from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).
13418 if (N->getOperand(0).getOpcode() != ISD::XOR)
13419 return SDValue();
13420 SDValue XOR = N->getOperand(0);
13421
13422 // Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s.
13423 // It is important to check with truncation allowed as the BUILD_VECTORs we
13424 // generate in those situations will truncate their operands.
13425 ConstantSDNode *Const =
13426 isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,
13427 /*AllowTruncation*/ true);
13428 if (!Const || !Const->isOne())
13429 return SDValue();
13430
13431 // Rewrite into vselect(cond, rhs, lhs).
13432 SDValue Cond = XOR->getOperand(0);
13433 SDValue LHS = N->getOperand(1);
13434 SDValue RHS = N->getOperand(2);
13435 EVT Type = N->getValueType(0);
13436 return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);
13437}
13438
13439// Convert vsetcc([0,1,2,..], splat(n), ult) -> vctp n
13442 const ARMSubtarget *Subtarget) {
13443 SDValue Op0 = N->getOperand(0);
13444 SDValue Op1 = N->getOperand(1);
13445 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13446 EVT VT = N->getValueType(0);
13447
13448 if (!Subtarget->hasMVEIntegerOps() ||
13450 return SDValue();
13451
13452 if (CC == ISD::SETUGE) {
13453 std::swap(Op0, Op1);
13454 CC = ISD::SETULT;
13455 }
13456
13457 if (CC != ISD::SETULT || VT.getScalarSizeInBits() != 1 ||
13459 return SDValue();
13460
13461 // Check first operand is BuildVector of 0,1,2,...
13462 for (unsigned I = 0; I < VT.getVectorNumElements(); I++) {
13463 if (!Op0.getOperand(I).isUndef() &&
13464 !(isa<ConstantSDNode>(Op0.getOperand(I)) &&
13465 Op0.getConstantOperandVal(I) == I))
13466 return SDValue();
13467 }
13468
13469 // The second is a Splat of Op1S
13470 SDValue Op1S = DCI.DAG.getSplatValue(Op1);
13471 if (!Op1S)
13472 return SDValue();
13473
13474 unsigned Opc;
13475 switch (VT.getVectorNumElements()) {
13476 case 2:
13477 Opc = Intrinsic::arm_mve_vctp64;
13478 break;
13479 case 4:
13480 Opc = Intrinsic::arm_mve_vctp32;
13481 break;
13482 case 8:
13483 Opc = Intrinsic::arm_mve_vctp16;
13484 break;
13485 case 16:
13486 Opc = Intrinsic::arm_mve_vctp8;
13487 break;
13488 default:
13489 return SDValue();
13490 }
13491
13492 SDLoc DL(N);
13493 return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13494 DCI.DAG.getConstant(Opc, DL, MVT::i32),
13495 DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32));
13496}
13497
13500 const ARMSubtarget *Subtarget) {
13501 SelectionDAG &DAG = DCI.DAG;
13502 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13503
13504 if (TLI.isOperationLegal(N->getOpcode(), N->getValueType(0)))
13505 return SDValue();
13506
13507 return TLI.expandABS(N, DAG);
13508}
13509
13510/// PerformADDECombine - Target-specific dag combine transform from
13511/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
13512/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
13515 const ARMSubtarget *Subtarget) {
13516 // Only ARM and Thumb2 support UMLAL/SMLAL.
13517 if (Subtarget->isThumb1Only())
13518 return PerformAddeSubeCombine(N, DCI, Subtarget);
13519
13520 // Only perform the checks after legalize when the pattern is available.
13521 if (DCI.isBeforeLegalize()) return SDValue();
13522
13523 return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
13524}
13525
13526/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
13527/// operands N0 and N1. This is a helper for PerformADDCombine that is
13528/// called with the default operands, and if that fails, with commuted
13529/// operands.
13532 const ARMSubtarget *Subtarget){
13533 // Attempt to create vpadd for this add.
13534 if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
13535 return Result;
13536
13537 // Attempt to create vpaddl for this add.
13538 if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
13539 return Result;
13540 if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
13541 Subtarget))
13542 return Result;
13543
13544 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
13545 if (N0.getNode()->hasOneUse())
13546 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
13547 return Result;
13548 return SDValue();
13549}
13550
13552 EVT VT = N->getValueType(0);
13553 SDValue N0 = N->getOperand(0);
13554 SDValue N1 = N->getOperand(1);
13555 SDLoc dl(N);
13556
13557 auto IsVecReduce = [](SDValue Op) {
13558 switch (Op.getOpcode()) {
13559 case ISD::VECREDUCE_ADD:
13560 case ARMISD::VADDVs:
13561 case ARMISD::VADDVu:
13562 case ARMISD::VMLAVs:
13563 case ARMISD::VMLAVu:
13564 return true;
13565 }
13566 return false;
13567 };
13568
13569 auto DistrubuteAddAddVecReduce = [&](SDValue N0, SDValue N1) {
13570 // Distribute add(X, add(vecreduce(Y), vecreduce(Z))) ->
13571 // add(add(X, vecreduce(Y)), vecreduce(Z))
13572 // to make better use of vaddva style instructions.
13573 if (VT == MVT::i32 && N1.getOpcode() == ISD::ADD && !IsVecReduce(N0) &&
13574 IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1)) &&
13575 !isa<ConstantSDNode>(N0) && N1->hasOneUse()) {
13576 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0, N1.getOperand(0));
13577 return DAG.getNode(ISD::ADD, dl, VT, Add0, N1.getOperand(1));
13578 }
13579 // And turn add(add(A, reduce(B)), add(C, reduce(D))) ->
13580 // add(add(add(A, C), reduce(B)), reduce(D))
13581 if (VT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
13582 N1.getOpcode() == ISD::ADD && N0->hasOneUse() && N1->hasOneUse()) {
13583 unsigned N0RedOp = 0;
13584 if (!IsVecReduce(N0.getOperand(N0RedOp))) {
13585 N0RedOp = 1;
13586 if (!IsVecReduce(N0.getOperand(N0RedOp)))
13587 return SDValue();
13588 }
13589
13590 unsigned N1RedOp = 0;
13591 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13592 N1RedOp = 1;
13593 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13594 return SDValue();
13595
13596 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0.getOperand(1 - N0RedOp),
13597 N1.getOperand(1 - N1RedOp));
13598 SDValue Add1 =
13599 DAG.getNode(ISD::ADD, dl, VT, Add0, N0.getOperand(N0RedOp));
13600 return DAG.getNode(ISD::ADD, dl, VT, Add1, N1.getOperand(N1RedOp));
13601 }
13602 return SDValue();
13603 };
13604 if (SDValue R = DistrubuteAddAddVecReduce(N0, N1))
13605 return R;
13606 if (SDValue R = DistrubuteAddAddVecReduce(N1, N0))
13607 return R;
13608
13609 // Distribute add(vecreduce(load(Y)), vecreduce(load(Z)))
13610 // Or add(add(X, vecreduce(load(Y))), vecreduce(load(Z)))
13611 // by ascending load offsets. This can help cores prefetch if the order of
13612 // loads is more predictable.
13613 auto DistrubuteVecReduceLoad = [&](SDValue N0, SDValue N1, bool IsForward) {
13614 // Check if two reductions are known to load data where one is before/after
13615 // another. Return negative if N0 loads data before N1, positive if N1 is
13616 // before N0 and 0 otherwise if nothing is known.
13617 auto IsKnownOrderedLoad = [&](SDValue N0, SDValue N1) {
13618 // Look through to the first operand of a MUL, for the VMLA case.
13619 // Currently only looks at the first operand, in the hope they are equal.
13620 if (N0.getOpcode() == ISD::MUL)
13621 N0 = N0.getOperand(0);
13622 if (N1.getOpcode() == ISD::MUL)
13623 N1 = N1.getOperand(0);
13624
13625 // Return true if the two operands are loads to the same object and the
13626 // offset of the first is known to be less than the offset of the second.
13627 LoadSDNode *Load0 = dyn_cast<LoadSDNode>(N0);
13628 LoadSDNode *Load1 = dyn_cast<LoadSDNode>(N1);
13629 if (!Load0 || !Load1 || Load0->getChain() != Load1->getChain() ||
13630 !Load0->isSimple() || !Load1->isSimple() || Load0->isIndexed() ||
13631 Load1->isIndexed())
13632 return 0;
13633
13634 auto BaseLocDecomp0 = BaseIndexOffset::match(Load0, DAG);
13635 auto BaseLocDecomp1 = BaseIndexOffset::match(Load1, DAG);
13636
13637 if (!BaseLocDecomp0.getBase() ||
13638 BaseLocDecomp0.getBase() != BaseLocDecomp1.getBase() ||
13639 !BaseLocDecomp0.hasValidOffset() || !BaseLocDecomp1.hasValidOffset())
13640 return 0;
13641 if (BaseLocDecomp0.getOffset() < BaseLocDecomp1.getOffset())
13642 return -1;
13643 if (BaseLocDecomp0.getOffset() > BaseLocDecomp1.getOffset())
13644 return 1;
13645 return 0;
13646 };
13647
13648 SDValue X;
13649 if (N0.getOpcode() == ISD::ADD && N0->hasOneUse()) {
13650 if (IsVecReduce(N0.getOperand(0)) && IsVecReduce(N0.getOperand(1))) {
13651 int IsBefore = IsKnownOrderedLoad(N0.getOperand(0).getOperand(0),
13652 N0.getOperand(1).getOperand(0));
13653 if (IsBefore < 0) {
13654 X = N0.getOperand(0);
13655 N0 = N0.getOperand(1);
13656 } else if (IsBefore > 0) {
13657 X = N0.getOperand(1);
13658 N0 = N0.getOperand(0);
13659 } else
13660 return SDValue();
13661 } else if (IsVecReduce(N0.getOperand(0))) {
13662 X = N0.getOperand(1);
13663 N0 = N0.getOperand(0);
13664 } else if (IsVecReduce(N0.getOperand(1))) {
13665 X = N0.getOperand(0);
13666 N0 = N0.getOperand(1);
13667 } else
13668 return SDValue();
13669 } else if (IsForward && IsVecReduce(N0) && IsVecReduce(N1) &&
13670 IsKnownOrderedLoad(N0.getOperand(0), N1.getOperand(0)) < 0) {
13671 // Note this is backward to how you would expect. We create
13672 // add(reduce(load + 16), reduce(load + 0)) so that the
13673 // add(reduce(load+16), X) is combined into VADDVA(X, load+16)), leaving
13674 // the X as VADDV(load + 0)
13675 return DAG.getNode(ISD::ADD, dl, VT, N1, N0);
13676 } else
13677 return SDValue();
13678
13679 if (!IsVecReduce(N0) || !IsVecReduce(N1))
13680 return SDValue();
13681
13682 if (IsKnownOrderedLoad(N1.getOperand(0), N0.getOperand(0)) >= 0)
13683 return SDValue();
13684
13685 // Switch from add(add(X, N0), N1) to add(add(X, N1), N0)
13686 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, X, N1);
13687 return DAG.getNode(ISD::ADD, dl, VT, Add0, N0);
13688 };
13689 if (SDValue R = DistrubuteVecReduceLoad(N0, N1, true))
13690 return R;
13691 if (SDValue R = DistrubuteVecReduceLoad(N1, N0, false))
13692 return R;
13693 return SDValue();
13694}
13695
13697 const ARMSubtarget *Subtarget) {
13698 if (!Subtarget->hasMVEIntegerOps())
13699 return SDValue();
13700
13702 return R;
13703
13704 EVT VT = N->getValueType(0);
13705 SDValue N0 = N->getOperand(0);
13706 SDValue N1 = N->getOperand(1);
13707 SDLoc dl(N);
13708
13709 if (VT != MVT::i64)
13710 return SDValue();
13711
13712 // We are looking for a i64 add of a VADDLVx. Due to these being i64's, this
13713 // will look like:
13714 // t1: i32,i32 = ARMISD::VADDLVs x
13715 // t2: i64 = build_pair t1, t1:1
13716 // t3: i64 = add t2, y
13717 // Otherwise we try to push the add up above VADDLVAx, to potentially allow
13718 // the add to be simplified seperately.
13719 // We also need to check for sext / zext and commutitive adds.
13720 auto MakeVecReduce = [&](unsigned Opcode, unsigned OpcodeA, SDValue NA,
13721 SDValue NB) {
13722 if (NB->getOpcode() != ISD::BUILD_PAIR)
13723 return SDValue();
13724 SDValue VecRed = NB->getOperand(0);
13725 if ((VecRed->getOpcode() != Opcode && VecRed->getOpcode() != OpcodeA) ||
13726 VecRed.getResNo() != 0 ||
13727 NB->getOperand(1) != SDValue(VecRed.getNode(), 1))
13728 return SDValue();
13729
13730 if (VecRed->getOpcode() == OpcodeA) {
13731 // add(NA, VADDLVA(Inp), Y) -> VADDLVA(add(NA, Inp), Y)
13732 SDValue Inp = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
13733 VecRed.getOperand(0), VecRed.getOperand(1));
13734 NA = DAG.getNode(ISD::ADD, dl, MVT::i64, Inp, NA);
13735 }
13736
13738 std::tie(Ops[0], Ops[1]) = DAG.SplitScalar(NA, dl, MVT::i32, MVT::i32);
13739
13740 unsigned S = VecRed->getOpcode() == OpcodeA ? 2 : 0;
13741 for (unsigned I = S, E = VecRed.getNumOperands(); I < E; I++)
13742 Ops.push_back(VecRed->getOperand(I));
13743 SDValue Red =
13744 DAG.getNode(OpcodeA, dl, DAG.getVTList({MVT::i32, MVT::i32}), Ops);
13745 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Red,
13746 SDValue(Red.getNode(), 1));
13747 };
13748
13749 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N0, N1))
13750 return M;
13751 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N0, N1))
13752 return M;
13753 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N1, N0))
13754 return M;
13755 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N1, N0))
13756 return M;
13757 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N0, N1))
13758 return M;
13759 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N0, N1))
13760 return M;
13761 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N1, N0))
13762 return M;
13763 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N1, N0))
13764 return M;
13765 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N0, N1))
13766 return M;
13767 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N0, N1))
13768 return M;
13769 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N1, N0))
13770 return M;
13771 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
13772 return M;
13773 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
13774 return M;
13775 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
13776 return M;
13777 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
13778 return M;
13779 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
13780 return M;
13781 return SDValue();
13782}
13783
13784bool
13786 CombineLevel Level) const {
13787 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
13788 N->getOpcode() == ISD::SRL) &&
13789 "Expected shift op");
13790
13791 if (Level == BeforeLegalizeTypes)
13792 return true;
13793
13794 if (N->getOpcode() != ISD::SHL)
13795 return true;
13796
13797 if (Subtarget->isThumb1Only()) {
13798 // Avoid making expensive immediates by commuting shifts. (This logic
13799 // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted
13800 // for free.)
13801 if (N->getOpcode() != ISD::SHL)
13802 return true;
13803 SDValue N1 = N->getOperand(0);
13804 if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
13805 N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
13806 return true;
13807 if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
13808 if (Const->getAPIntValue().ult(256))
13809 return false;
13810 if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
13811 Const->getAPIntValue().sgt(-256))
13812 return false;
13813 }
13814 return true;
13815 }
13816
13817 // Turn off commute-with-shift transform after legalization, so it doesn't
13818 // conflict with PerformSHLSimplify. (We could try to detect when
13819 // PerformSHLSimplify would trigger more precisely, but it isn't
13820 // really necessary.)
13821 return false;
13822}
13823
13825 const SDNode *N) const {
13826 assert(N->getOpcode() == ISD::XOR &&
13827 (N->getOperand(0).getOpcode() == ISD::SHL ||
13828 N->getOperand(0).getOpcode() == ISD::SRL) &&
13829 "Expected XOR(SHIFT) pattern");
13830
13831 // Only commute if the entire NOT mask is a hidden shifted mask.
13832 auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
13833 auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
13834 if (XorC && ShiftC) {
13835 unsigned MaskIdx, MaskLen;
13836 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
13837 unsigned ShiftAmt = ShiftC->getZExtValue();
13838 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
13839 if (N->getOperand(0).getOpcode() == ISD::SHL)
13840 return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
13841 return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
13842 }
13843 }
13844
13845 return false;
13846}
13847
13849 const SDNode *N, CombineLevel Level) const {
13850 assert(((N->getOpcode() == ISD::SHL &&
13851 N->getOperand(0).getOpcode() == ISD::SRL) ||
13852 (N->getOpcode() == ISD::SRL &&
13853 N->getOperand(0).getOpcode() == ISD::SHL)) &&
13854 "Expected shift-shift mask");
13855
13856 if (!Subtarget->isThumb1Only())
13857 return true;
13858
13859 if (Level == BeforeLegalizeTypes)
13860 return true;
13861
13862 return false;
13863}
13864
13866 EVT VT) const {
13867 return Subtarget->hasMVEIntegerOps() && isTypeLegal(VT);
13868}
13869
13871 if (!Subtarget->hasNEON()) {
13872 if (Subtarget->isThumb1Only())
13873 return VT.getScalarSizeInBits() <= 32;
13874 return true;
13875 }
13876 return VT.isScalarInteger();
13877}
13878
13880 EVT VT) const {
13881 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
13882 return false;
13883
13884 switch (FPVT.getSimpleVT().SimpleTy) {
13885 case MVT::f16:
13886 return Subtarget->hasVFP2Base();
13887 case MVT::f32:
13888 return Subtarget->hasVFP2Base();
13889 case MVT::f64:
13890 return Subtarget->hasFP64();
13891 case MVT::v4f32:
13892 case MVT::v8f16:
13893 return Subtarget->hasMVEFloatOps();
13894 default:
13895 return false;
13896 }
13897}
13898
13901 const ARMSubtarget *ST) {
13902 // Allow the generic combiner to identify potential bswaps.
13903 if (DCI.isBeforeLegalize())
13904 return SDValue();
13905
13906 // DAG combiner will fold:
13907 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
13908 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
13909 // Other code patterns that can be also be modified have the following form:
13910 // b + ((a << 1) | 510)
13911 // b + ((a << 1) & 510)
13912 // b + ((a << 1) ^ 510)
13913 // b + ((a << 1) + 510)
13914
13915 // Many instructions can perform the shift for free, but it requires both
13916 // the operands to be registers. If c1 << c2 is too large, a mov immediate
13917 // instruction will needed. So, unfold back to the original pattern if:
13918 // - if c1 and c2 are small enough that they don't require mov imms.
13919 // - the user(s) of the node can perform an shl
13920
13921 // No shifted operands for 16-bit instructions.
13922 if (ST->isThumb() && ST->isThumb1Only())
13923 return SDValue();
13924
13925 // Check that all the users could perform the shl themselves.
13926 for (auto *U : N->uses()) {
13927 switch(U->getOpcode()) {
13928 default:
13929 return SDValue();
13930 case ISD::SUB:
13931 case ISD::ADD:
13932 case ISD::AND:
13933 case ISD::OR:
13934 case ISD::XOR:
13935 case ISD::SETCC:
13936 case ARMISD::CMP:
13937 // Check that the user isn't already using a constant because there
13938 // aren't any instructions that support an immediate operand and a
13939 // shifted operand.
13940 if (isa<ConstantSDNode>(U->getOperand(0)) ||
13941 isa<ConstantSDNode>(U->getOperand(1)))
13942 return SDValue();
13943
13944 // Check that it's not already using a shift.
13945 if (U->getOperand(0).getOpcode() == ISD::SHL ||
13946 U->getOperand(1).getOpcode() == ISD::SHL)
13947 return SDValue();
13948 break;
13949 }
13950 }
13951
13952 if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
13953 N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
13954 return SDValue();
13955
13956 if (N->getOperand(0).getOpcode() != ISD::SHL)
13957 return SDValue();
13958
13959 SDValue SHL = N->getOperand(0);
13960
13961 auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
13962 auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
13963 if (!C1ShlC2 || !C2)
13964 return SDValue();
13965
13966 APInt C2Int = C2->getAPIntValue();
13967 APInt C1Int = C1ShlC2->getAPIntValue();
13968 unsigned C2Width = C2Int.getBitWidth();
13969 if (C2Int.uge(C2Width))
13970 return SDValue();
13971 uint64_t C2Value = C2Int.getZExtValue();
13972
13973 // Check that performing a lshr will not lose any information.
13974 APInt Mask = APInt::getHighBitsSet(C2Width, C2Width - C2Value);
13975 if ((C1Int & Mask) != C1Int)
13976 return SDValue();
13977
13978 // Shift the first constant.
13979 C1Int.lshrInPlace(C2Int);
13980
13981 // The immediates are encoded as an 8-bit value that can be rotated.
13982 auto LargeImm = [](const APInt &Imm) {
13983 unsigned Zeros = Imm.countl_zero() + Imm.countr_zero();
13984 return Imm.getBitWidth() - Zeros > 8;
13985 };
13986
13987 if (LargeImm(C1Int) || LargeImm(C2Int))
13988 return SDValue();
13989
13990 SelectionDAG &DAG = DCI.DAG;
13991 SDLoc dl(N);
13992 SDValue X = SHL.getOperand(0);
13993 SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
13994 DAG.getConstant(C1Int, dl, MVT::i32));
13995 // Shift left to compensate for the lshr of C1Int.
13996 SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));
13997
13998 LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();
13999 SHL.dump(); N->dump());
14000 LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump());
14001 return Res;
14002}
14003
14004
14005/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
14006///
14009 const ARMSubtarget *Subtarget) {
14010 SDValue N0 = N->getOperand(0);
14011 SDValue N1 = N->getOperand(1);
14012
14013 // Only works one way, because it needs an immediate operand.
14014 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14015 return Result;
14016
14017 if (SDValue Result = PerformADDVecReduce(N, DCI.DAG, Subtarget))
14018 return Result;
14019
14020 // First try with the default operand order.
14021 if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
14022 return Result;
14023
14024 // If that didn't work, try again with the operands commuted.
14025 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
14026}
14027
14028// Combine (sub 0, (csinc X, Y, CC)) -> (csinv -X, Y, CC)
14029// providing -X is as cheap as X (currently, just a constant).
14031 if (N->getValueType(0) != MVT::i32 || !isNullConstant(N->getOperand(0)))
14032 return SDValue();
14033 SDValue CSINC = N->getOperand(1);
14034 if (CSINC.getOpcode() != ARMISD::CSINC || !CSINC.hasOneUse())
14035 return SDValue();
14036
14037 ConstantSDNode *X = dyn_cast<ConstantSDNode>(CSINC.getOperand(0));
14038 if (!X)
14039 return SDValue();
14040
14041 return DAG.getNode(ARMISD::CSINV, SDLoc(N), MVT::i32,
14042 DAG.getNode(ISD::SUB, SDLoc(N), MVT::i32, N->getOperand(0),
14043 CSINC.getOperand(0)),
14044 CSINC.getOperand(1), CSINC.getOperand(2),
14045 CSINC.getOperand(3));
14046}
14047
14048/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
14049///
14052 const ARMSubtarget *Subtarget) {
14053 SDValue N0 = N->getOperand(0);
14054 SDValue N1 = N->getOperand(1);
14055
14056 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
14057 if (N1.getNode()->hasOneUse())
14058 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
14059 return Result;
14060
14061 if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))
14062 return R;
14063
14064 if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
14065 return SDValue();
14066
14067 // Fold (sub (ARMvmovImm 0), (ARMvdup x)) -> (ARMvdup (sub 0, x))
14068 // so that we can readily pattern match more mve instructions which can use
14069 // a scalar operand.
14070 SDValue VDup = N->getOperand(1);
14071 if (VDup->getOpcode() != ARMISD::VDUP)
14072 return SDValue();
14073
14074 SDValue VMov = N->getOperand(0);
14075 if (VMov->getOpcode() == ISD::BITCAST)
14076 VMov = VMov->getOperand(0);
14077
14078 if (VMov->getOpcode() != ARMISD::VMOVIMM || !isZeroVector(VMov))
14079 return SDValue();
14080
14081 SDLoc dl(N);
14082 SDValue Negate = DCI.DAG.getNode(ISD::SUB, dl, MVT::i32,
14083 DCI.DAG.getConstant(0, dl, MVT::i32),
14084 VDup->getOperand(0));
14085 return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0), Negate);
14086}
14087
14088/// PerformVMULCombine
14089/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
14090/// special multiplier accumulator forwarding.
14091/// vmul d3, d0, d2
14092/// vmla d3, d1, d2
14093/// is faster than
14094/// vadd d3, d0, d1
14095/// vmul d3, d3, d2
14096// However, for (A + B) * (A + B),
14097// vadd d2, d0, d1
14098// vmul d3, d0, d2
14099// vmla d3, d1, d2
14100// is slower than
14101// vadd d2, d0, d1
14102// vmul d3, d2, d2
14105 const ARMSubtarget *Subtarget) {
14106 if (!Subtarget->hasVMLxForwarding())
14107 return SDValue();
14108
14109 SelectionDAG &DAG = DCI.DAG;
14110 SDValue N0 = N->getOperand(0);
14111 SDValue N1 = N->getOperand(1);
14112 unsigned Opcode = N0.getOpcode();
14113 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14114 Opcode != ISD::FADD && Opcode != ISD::FSUB) {
14115 Opcode = N1.getOpcode();
14116 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14117 Opcode != ISD::FADD && Opcode != ISD::FSUB)
14118 return SDValue();
14119 std::swap(N0, N1);
14120 }
14121
14122 if (N0 == N1)
14123 return SDValue();
14124
14125 EVT VT = N->getValueType(0);
14126 SDLoc DL(N);
14127 SDValue N00 = N0->getOperand(0);
14128 SDValue N01 = N0->getOperand(1);
14129 return DAG.getNode(Opcode, DL, VT,
14130 DAG.getNode(ISD::MUL, DL, VT, N00, N1),
14131 DAG.getNode(ISD::MUL, DL, VT, N01, N1));
14132}
14133
14135 const ARMSubtarget *Subtarget) {
14136 EVT VT = N->getValueType(0);
14137 if (VT != MVT::v2i64)
14138 return SDValue();
14139
14140 SDValue N0 = N->getOperand(0);
14141 SDValue N1 = N->getOperand(1);
14142
14143 auto IsSignExt = [&](SDValue Op) {
14144 if (Op->getOpcode() != ISD::SIGN_EXTEND_INREG)
14145 return SDValue();
14146 EVT VT = cast<VTSDNode>(Op->getOperand(1))->getVT();
14147 if (VT.getScalarSizeInBits() == 32)
14148 return Op->getOperand(0);
14149 return SDValue();
14150 };
14151 auto IsZeroExt = [&](SDValue Op) {
14152 // Zero extends are a little more awkward. At the point we are matching
14153 // this, we are looking for an AND with a (-1, 0, -1, 0) buildvector mask.
14154 // That might be before of after a bitcast depending on how the and is
14155 // placed. Because this has to look through bitcasts, it is currently only
14156 // supported on LE.
14157 if (!Subtarget->isLittle())
14158 return SDValue();
14159
14160 SDValue And = Op;
14161 if (And->getOpcode() == ISD::BITCAST)
14162 And = And->getOperand(0);
14163 if (And->getOpcode() != ISD::AND)
14164 return SDValue();
14165 SDValue Mask = And->getOperand(1);
14166 if (Mask->getOpcode() == ISD::BITCAST)
14167 Mask = Mask->getOperand(0);
14168
14169 if (Mask->getOpcode() != ISD::BUILD_VECTOR ||
14170 Mask.getValueType() != MVT::v4i32)
14171 return SDValue();
14172 if (isAllOnesConstant(Mask->getOperand(0)) &&
14173 isNullConstant(Mask->getOperand(1)) &&
14174 isAllOnesConstant(Mask->getOperand(2)) &&
14175 isNullConstant(Mask->getOperand(3)))
14176 return And->getOperand(0);
14177 return SDValue();
14178 };
14179
14180 SDLoc dl(N);
14181 if (SDValue Op0 = IsSignExt(N0)) {
14182 if (SDValue Op1 = IsSignExt(N1)) {
14183 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14184 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14185 return DAG.getNode(ARMISD::VMULLs, dl, VT, New0a, New1a);
14186 }
14187 }
14188 if (SDValue Op0 = IsZeroExt(N0)) {
14189 if (SDValue Op1 = IsZeroExt(N1)) {
14190 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14191 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14192 return DAG.getNode(ARMISD::VMULLu, dl, VT, New0a, New1a);
14193 }
14194 }
14195
14196 return SDValue();
14197}
14198
14201 const ARMSubtarget *Subtarget) {
14202 SelectionDAG &DAG = DCI.DAG;
14203
14204 EVT VT = N->getValueType(0);
14205 if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)
14206 return PerformMVEVMULLCombine(N, DAG, Subtarget);
14207
14208 if (Subtarget->isThumb1Only())
14209 return SDValue();
14210
14211 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14212 return SDValue();
14213
14214 if (VT.is64BitVector() || VT.is128BitVector())
14215 return PerformVMULCombine(N, DCI, Subtarget);
14216 if (VT != MVT::i32)
14217 return SDValue();
14218
14219 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14220 if (!C)
14221 return SDValue();
14222
14223 int64_t MulAmt = C->getSExtValue();
14224 unsigned ShiftAmt = llvm::countr_zero<uint64_t>(MulAmt);
14225
14226 ShiftAmt = ShiftAmt & (32 - 1);
14227 SDValue V = N->getOperand(0);
14228 SDLoc DL(N);
14229
14230 SDValue Res;
14231 MulAmt >>= ShiftAmt;
14232
14233 if (MulAmt >= 0) {
14234 if (llvm::has_single_bit<uint32_t>(MulAmt - 1)) {
14235 // (mul x, 2^N + 1) => (add (shl x, N), x)
14236 Res = DAG.getNode(ISD::ADD, DL, VT,
14237 V,
14238 DAG.getNode(ISD::SHL, DL, VT,
14239 V,
14240 DAG.getConstant(Log2_32(MulAmt - 1), DL,
14241 MVT::i32)));
14242 } else if (llvm::has_single_bit<uint32_t>(MulAmt + 1)) {
14243 // (mul x, 2^N - 1) => (sub (shl x, N), x)
14244 Res = DAG.getNode(ISD::SUB, DL, VT,
14245 DAG.getNode(ISD::SHL, DL, VT,
14246 V,
14247 DAG.getConstant(Log2_32(MulAmt + 1), DL,
14248 MVT::i32)),
14249 V);
14250 } else
14251 return SDValue();
14252 } else {
14253 uint64_t MulAmtAbs = -MulAmt;
14254 if (llvm::has_single_bit<uint32_t>(MulAmtAbs + 1)) {
14255 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
14256 Res = DAG.getNode(ISD::SUB, DL, VT,
14257 V,
14258 DAG.getNode(ISD::SHL, DL, VT,
14259 V,
14260 DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
14261 MVT::i32)));
14262 } else if (llvm::has_single_bit<uint32_t>(MulAmtAbs - 1)) {
14263 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
14264 Res = DAG.getNode(ISD::ADD, DL, VT,
14265 V,
14266 DAG.getNode(ISD::SHL, DL, VT,
14267 V,
14268 DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
14269 MVT::i32)));
14270 Res = DAG.getNode(ISD::SUB, DL, VT,
14271 DAG.getConstant(0, DL, MVT::i32), Res);
14272 } else
14273 return SDValue();
14274 }
14275
14276 if (ShiftAmt != 0)
14277 Res = DAG.getNode(ISD::SHL, DL, VT,
14278 Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
14279
14280 // Do not add new nodes to DAG combiner worklist.
14281 DCI.CombineTo(N, Res, false);
14282 return SDValue();
14283}
14284
14287 const ARMSubtarget *Subtarget) {
14288 // Allow DAGCombine to pattern-match before we touch the canonical form.
14289 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14290 return SDValue();
14291
14292 if (N->getValueType(0) != MVT::i32)
14293 return SDValue();
14294
14295 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14296 if (!N1C)
14297 return SDValue();
14298
14299 uint32_t C1 = (uint32_t)N1C->getZExtValue();
14300 // Don't transform uxtb/uxth.
14301 if (C1 == 255 || C1 == 65535)
14302 return SDValue();
14303
14304 SDNode *N0 = N->getOperand(0).getNode();
14305 if (!N0->hasOneUse())
14306 return SDValue();
14307
14308 if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
14309 return SDValue();
14310
14311 bool LeftShift = N0->getOpcode() == ISD::SHL;
14312
14313 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14314 if (!N01C)
14315 return SDValue();
14316
14317 uint32_t C2 = (uint32_t)N01C->getZExtValue();
14318 if (!C2 || C2 >= 32)
14319 return SDValue();
14320
14321 // Clear irrelevant bits in the mask.
14322 if (LeftShift)
14323 C1 &= (-1U << C2);
14324 else
14325 C1 &= (-1U >> C2);
14326
14327 SelectionDAG &DAG = DCI.DAG;
14328 SDLoc DL(N);
14329
14330 // We have a pattern of the form "(and (shl x, c2) c1)" or
14331 // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
14332 // transform to a pair of shifts, to save materializing c1.
14333
14334 // First pattern: right shift, then mask off leading bits.
14335 // FIXME: Use demanded bits?
14336 if (!LeftShift && isMask_32(C1)) {
14337 uint32_t C3 = llvm::countl_zero(C1);
14338 if (C2 < C3) {
14339 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14340 DAG.getConstant(C3 - C2, DL, MVT::i32));
14341 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14342 DAG.getConstant(C3, DL, MVT::i32));
14343 }
14344 }
14345
14346 // First pattern, reversed: left shift, then mask off trailing bits.
14347 if (LeftShift && isMask_32(~C1)) {
14348 uint32_t C3 = llvm::countr_zero(C1);
14349 if (C2 < C3) {
14350 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14351 DAG.getConstant(C3 - C2, DL, MVT::i32));
14352 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14353 DAG.getConstant(C3, DL, MVT::i32));
14354 }
14355 }
14356
14357 // Second pattern: left shift, then mask off leading bits.
14358 // FIXME: Use demanded bits?
14359 if (LeftShift && isShiftedMask_32(C1)) {
14360 uint32_t Trailing = llvm::countr_zero(C1);
14361 uint32_t C3 = llvm::countl_zero(C1);
14362 if (Trailing == C2 && C2 + C3 < 32) {
14363 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14364 DAG.getConstant(C2 + C3, DL, MVT::i32));
14365 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14366 DAG.getConstant(C3, DL, MVT::i32));
14367 }
14368 }
14369
14370 // Second pattern, reversed: right shift, then mask off trailing bits.
14371 // FIXME: Handle other patterns of known/demanded bits.
14372 if (!LeftShift && isShiftedMask_32(C1)) {
14373 uint32_t Leading = llvm::countl_zero(C1);
14374 uint32_t C3 = llvm::countr_zero(C1);
14375 if (Leading == C2 && C2 + C3 < 32) {
14376 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14377 DAG.getConstant(C2 + C3, DL, MVT::i32));
14378 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14379 DAG.getConstant(C3, DL, MVT::i32));
14380 }
14381 }
14382
14383 // FIXME: Transform "(and (shl x, c2) c1)" ->
14384 // "(shl (and x, c1>>c2), c2)" if "c1 >> c2" is a cheaper immediate than
14385 // c1.
14386 return SDValue();
14387}
14388
14391 const ARMSubtarget *Subtarget) {
14392 // Attempt to use immediate-form VBIC
14393 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14394 SDLoc dl(N);
14395 EVT VT = N->getValueType(0);
14396 SelectionDAG &DAG = DCI.DAG;
14397
14398 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v2i1 ||
14399 VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)
14400 return SDValue();
14401
14402 APInt SplatBits, SplatUndef;
14403 unsigned SplatBitSize;
14404 bool HasAnyUndefs;
14405 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14406 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14407 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14408 SplatBitSize == 64) {
14409 EVT VbicVT;
14410 SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
14411 SplatUndef.getZExtValue(), SplatBitSize,
14412 DAG, dl, VbicVT, VT, OtherModImm);
14413 if (Val.getNode()) {
14414 SDValue Input =
14415 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
14416 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
14417 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
14418 }
14419 }
14420 }
14421
14422 if (!Subtarget->isThumb1Only()) {
14423 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
14424 if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
14425 return Result;
14426
14427 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14428 return Result;
14429 }
14430
14431 if (Subtarget->isThumb1Only())
14432 if (SDValue Result = CombineANDShift(N, DCI, Subtarget))
14433 return Result;
14434
14435 return SDValue();
14436}
14437
14438// Try combining OR nodes to SMULWB, SMULWT.
14441 const ARMSubtarget *Subtarget) {
14442 if (!Subtarget->hasV6Ops() ||
14443 (Subtarget->isThumb() &&
14444 (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
14445 return SDValue();
14446
14447 SDValue SRL = OR->getOperand(0);
14448 SDValue SHL = OR->getOperand(1);
14449
14450 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
14451 SRL = OR->getOperand(1);
14452 SHL = OR->getOperand(0);
14453 }
14454 if (!isSRL16(SRL) || !isSHL16(SHL))
14455 return SDValue();
14456
14457 // The first operands to the shifts need to be the two results from the
14458 // same smul_lohi node.
14459 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
14460 SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
14461 return SDValue();
14462
14463 SDNode *SMULLOHI = SRL.getOperand(0).getNode();
14464 if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
14465 SHL.getOperand(0) != SDValue(SMULLOHI, 1))
14466 return SDValue();
14467
14468 // Now we have:
14469 // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
14470 // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
14471 // For SMUWB the 16-bit value will signed extended somehow.
14472 // For SMULWT only the SRA is required.
14473 // Check both sides of SMUL_LOHI
14474 SDValue OpS16 = SMULLOHI->getOperand(0);
14475 SDValue OpS32 = SMULLOHI->getOperand(1);
14476
14477 SelectionDAG &DAG = DCI.DAG;
14478 if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
14479 OpS16 = OpS32;
14480 OpS32 = SMULLOHI->getOperand(0);
14481 }
14482
14483 SDLoc dl(OR);
14484 unsigned Opcode = 0;
14485 if (isS16(OpS16, DAG))
14486 Opcode = ARMISD::SMULWB;
14487 else if (isSRA16(OpS16)) {
14488 Opcode = ARMISD::SMULWT;
14489 OpS16 = OpS16->getOperand(0);
14490 }
14491 else
14492 return SDValue();
14493
14494 SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
14495 DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
14496 return SDValue(OR, 0);
14497}
14498
14501 const ARMSubtarget *Subtarget) {
14502 // BFI is only available on V6T2+
14503 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
14504 return SDValue();
14505
14506 EVT VT = N->getValueType(0);
14507 SDValue N0 = N->getOperand(0);
14508 SDValue N1 = N->getOperand(1);
14509 SelectionDAG &DAG = DCI.DAG;
14510 SDLoc DL(N);
14511 // 1) or (and A, mask), val => ARMbfi A, val, mask
14512 // iff (val & mask) == val
14513 //
14514 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14515 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
14516 // && mask == ~mask2
14517 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
14518 // && ~mask == mask2
14519 // (i.e., copy a bitfield value into another bitfield of the same width)
14520
14521 if (VT != MVT::i32)
14522 return SDValue();
14523
14524 SDValue N00 = N0.getOperand(0);
14525
14526 // The value and the mask need to be constants so we can verify this is
14527 // actually a bitfield set. If the mask is 0xffff, we can do better
14528 // via a movt instruction, so don't use BFI in that case.
14529 SDValue MaskOp = N0.getOperand(1);
14530 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
14531 if (!MaskC)
14532 return SDValue();
14533 unsigned Mask = MaskC->getZExtValue();
14534 if (Mask == 0xffff)
14535 return SDValue();
14536 SDValue Res;
14537 // Case (1): or (and A, mask), val => ARMbfi A, val, mask
14538 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
14539 if (N1C) {
14540 unsigned Val = N1C->getZExtValue();
14541 if ((Val & ~Mask) != Val)
14542 return SDValue();
14543
14544 if (ARM::isBitFieldInvertedMask(Mask)) {
14545 Val >>= llvm::countr_zero(~Mask);
14546
14547 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
14548 DAG.getConstant(Val, DL, MVT::i32),
14549 DAG.getConstant(Mask, DL, MVT::i32));
14550
14551 DCI.CombineTo(N, Res, false);
14552 // Return value from the original node to inform the combiner than N is
14553 // now dead.
14554 return SDValue(N, 0);
14555 }
14556 } else if (N1.getOpcode() == ISD::AND) {
14557 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14558 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14559 if (!N11C)
14560 return SDValue();
14561 unsigned Mask2 = N11C->getZExtValue();
14562
14563 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
14564 // as is to match.
14565 if (ARM::isBitFieldInvertedMask(Mask) &&
14566 (Mask == ~Mask2)) {
14567 // The pack halfword instruction works better for masks that fit it,
14568 // so use that when it's available.
14569 if (Subtarget->hasDSP() &&
14570 (Mask == 0xffff || Mask == 0xffff0000))
14571 return SDValue();
14572 // 2a
14573 unsigned amt = llvm::countr_zero(Mask2);
14574 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
14575 DAG.getConstant(amt, DL, MVT::i32));
14576 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
14577 DAG.getConstant(Mask, DL, MVT::i32));
14578 DCI.CombineTo(N, Res, false);
14579 // Return value from the original node to inform the combiner than N is
14580 // now dead.
14581 return SDValue(N, 0);
14582 } else if (ARM::isBitFieldInvertedMask(~Mask) &&
14583 (~Mask == Mask2)) {
14584 // The pack halfword instruction works better for masks that fit it,
14585 // so use that when it's available.
14586 if (Subtarget->hasDSP() &&
14587 (Mask2 == 0xffff || Mask2 == 0xffff0000))
14588 return SDValue();
14589 // 2b
14590 unsigned lsb = llvm::countr_zero(Mask);
14591 Res = DAG.getNode(ISD::SRL, DL, VT, N00,
14592 DAG.getConstant(lsb, DL, MVT::i32));
14593 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
14594 DAG.getConstant(Mask2, DL, MVT::i32));
14595 DCI.CombineTo(N, Res, false);
14596 // Return value from the original node to inform the combiner than N is
14597 // now dead.
14598 return SDValue(N, 0);
14599 }
14600 }
14601
14602 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
14603 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
14605 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
14606 // where lsb(mask) == #shamt and masked bits of B are known zero.
14607 SDValue ShAmt = N00.getOperand(1);
14608 unsigned ShAmtC = ShAmt->getAsZExtVal();
14609 unsigned LSB = llvm::countr_zero(Mask);
14610 if (ShAmtC != LSB)
14611 return SDValue();
14612
14613 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
14614 DAG.getConstant(~Mask, DL, MVT::i32));
14615
14616 DCI.CombineTo(N, Res, false);
14617 // Return value from the original node to inform the combiner than N is
14618 // now dead.
14619 return SDValue(N, 0);
14620 }
14621
14622 return SDValue();
14623}
14624
14625static bool isValidMVECond(unsigned CC, bool IsFloat) {
14626 switch (CC) {
14627 case ARMCC::EQ:
14628 case ARMCC::NE:
14629 case ARMCC::LE:
14630 case ARMCC::GT:
14631 case ARMCC::GE:
14632 case ARMCC::LT:
14633 return true;
14634 case ARMCC::HS:
14635 case ARMCC::HI:
14636 return !IsFloat;
14637 default:
14638 return false;
14639 };
14640}
14641
14643 if (N->getOpcode() == ARMISD::VCMP)
14644 return (ARMCC::CondCodes)N->getConstantOperandVal(2);
14645 else if (N->getOpcode() == ARMISD::VCMPZ)
14646 return (ARMCC::CondCodes)N->getConstantOperandVal(1);
14647 else
14648 llvm_unreachable("Not a VCMP/VCMPZ!");
14649}
14650
14653 return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint());
14654}
14655
14657 const ARMSubtarget *Subtarget) {
14658 // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
14659 // together with predicates
14660 EVT VT = N->getValueType(0);
14661 SDLoc DL(N);
14662 SDValue N0 = N->getOperand(0);
14663 SDValue N1 = N->getOperand(1);
14664
14665 auto IsFreelyInvertable = [&](SDValue V) {
14666 if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)
14667 return CanInvertMVEVCMP(V);
14668 return false;
14669 };
14670
14671 // At least one operand must be freely invertable.
14672 if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1)))
14673 return SDValue();
14674
14675 SDValue NewN0 = DAG.getLogicalNOT(DL, N0, VT);
14676 SDValue NewN1 = DAG.getLogicalNOT(DL, N1, VT);
14677 SDValue And = DAG.getNode(ISD::AND, DL, VT, NewN0, NewN1);
14678 return DAG.getLogicalNOT(DL, And, VT);
14679}
14680
14681/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
14684 const ARMSubtarget *Subtarget) {
14685 // Attempt to use immediate-form VORR
14686 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14687 SDLoc dl(N);
14688 EVT VT = N->getValueType(0);
14689 SelectionDAG &DAG = DCI.DAG;
14690
14691 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14692 return SDValue();
14693
14694 if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||
14695 VT == MVT::v8i1 || VT == MVT::v16i1))
14696 return PerformORCombine_i1(N, DAG, Subtarget);
14697
14698 APInt SplatBits, SplatUndef;
14699 unsigned SplatBitSize;
14700 bool HasAnyUndefs;
14701 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14702 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14703 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14704 SplatBitSize == 64) {
14705 EVT VorrVT;
14706 SDValue Val =
14707 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
14708 SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
14709 if (Val.getNode()) {
14710 SDValue Input =
14711 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
14712 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
14713 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
14714 }
14715 }
14716 }
14717
14718 if (!Subtarget->isThumb1Only()) {
14719 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
14720 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14721 return Result;
14722 if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
14723 return Result;
14724 }
14725
14726 SDValue N0 = N->getOperand(0);
14727 SDValue N1 = N->getOperand(1);
14728
14729 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
14730 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
14732
14733 // The code below optimizes (or (and X, Y), Z).
14734 // The AND operand needs to have a single user to make these optimizations
14735 // profitable.
14736 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
14737 return SDValue();
14738
14739 APInt SplatUndef;
14740 unsigned SplatBitSize;
14741 bool HasAnyUndefs;
14742
14743 APInt SplatBits0, SplatBits1;
14744 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
14745 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
14746 // Ensure that the second operand of both ands are constants
14747 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
14748 HasAnyUndefs) && !HasAnyUndefs) {
14749 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
14750 HasAnyUndefs) && !HasAnyUndefs) {
14751 // Ensure that the bit width of the constants are the same and that
14752 // the splat arguments are logical inverses as per the pattern we
14753 // are trying to simplify.
14754 if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
14755 SplatBits0 == ~SplatBits1) {
14756 // Canonicalize the vector type to make instruction selection
14757 // simpler.
14758 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
14759 SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
14760 N0->getOperand(1),
14761 N0->getOperand(0),
14762 N1->getOperand(0));
14763 return DAG.getNode(ISD::BITCAST, dl, VT, Result);
14764 }
14765 }
14766 }
14767 }
14768
14769 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
14770 // reasonable.
14771 if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
14772 if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
14773 return Res;
14774 }
14775
14776 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14777 return Result;
14778
14779 return SDValue();
14780}
14781
14784 const ARMSubtarget *Subtarget) {
14785 EVT VT = N->getValueType(0);
14786 SelectionDAG &DAG = DCI.DAG;
14787
14788 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14789 return SDValue();
14790
14791 if (!Subtarget->isThumb1Only()) {
14792 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
14793 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14794 return Result;
14795
14796 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14797 return Result;
14798 }
14799
14800 if (Subtarget->hasMVEIntegerOps()) {
14801 // fold (xor(vcmp/z, 1)) into a vcmp with the opposite condition.
14802 SDValue N0 = N->getOperand(0);
14803 SDValue N1 = N->getOperand(1);
14804 const TargetLowering *TLI = Subtarget->getTargetLowering();
14805 if (TLI->isConstTrueVal(N1) &&
14806 (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
14807 if (CanInvertMVEVCMP(N0)) {
14808 SDLoc DL(N0);
14810
14812 Ops.push_back(N0->getOperand(0));
14813 if (N0->getOpcode() == ARMISD::VCMP)
14814 Ops.push_back(N0->getOperand(1));
14815 Ops.push_back(DAG.getConstant(CC, DL, MVT::i32));
14816 return DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops);
14817 }
14818 }
14819 }
14820
14821 return SDValue();
14822}
14823
14824// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
14825// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
14826// their position in "to" (Rd).
14827static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
14828 assert(N->getOpcode() == ARMISD::BFI);
14829
14830 SDValue From = N->getOperand(1);
14831 ToMask = ~N->getConstantOperandAPInt(2);
14832 FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.popcount());
14833
14834 // If the Base came from a SHR #C, we can deduce that it is really testing bit
14835 // #C in the base of the SHR.
14836 if (From->getOpcode() == ISD::SRL &&
14837 isa<ConstantSDNode>(From->getOperand(1))) {
14838 APInt Shift = From->getConstantOperandAPInt(1);
14839 assert(Shift.getLimitedValue() < 32 && "Shift too large!");
14840 FromMask <<= Shift.getLimitedValue(31);
14841 From = From->getOperand(0);
14842 }
14843
14844 return From;
14845}
14846
14847// If A and B contain one contiguous set of bits, does A | B == A . B?
14848//
14849// Neither A nor B must be zero.
14850static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
14851 unsigned LastActiveBitInA = A.countr_zero();
14852 unsigned FirstActiveBitInB = B.getBitWidth() - B.countl_zero() - 1;
14853 return LastActiveBitInA - 1 == FirstActiveBitInB;
14854}
14855
14857 // We have a BFI in N. Find a BFI it can combine with, if one exists.
14858 APInt ToMask, FromMask;
14859 SDValue From = ParseBFI(N, ToMask, FromMask);
14860 SDValue To = N->getOperand(0);
14861
14862 SDValue V = To;
14863 if (V.getOpcode() != ARMISD::BFI)
14864 return SDValue();
14865
14866 APInt NewToMask, NewFromMask;
14867 SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
14868 if (NewFrom != From)
14869 return SDValue();
14870
14871 // Do the written bits conflict with any we've seen so far?
14872 if ((NewToMask & ToMask).getBoolValue())
14873 // Conflicting bits.
14874 return SDValue();
14875
14876 // Are the new bits contiguous when combined with the old bits?
14877 if (BitsProperlyConcatenate(ToMask, NewToMask) &&
14878 BitsProperlyConcatenate(FromMask, NewFromMask))
14879 return V;
14880 if (BitsProperlyConcatenate(NewToMask, ToMask) &&
14881 BitsProperlyConcatenate(NewFromMask, FromMask))
14882 return V;
14883
14884 return SDValue();
14885}
14886
14888 SDValue N0 = N->getOperand(0);
14889 SDValue N1 = N->getOperand(1);
14890
14891 if (N1.getOpcode() == ISD::AND) {
14892 // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
14893 // the bits being cleared by the AND are not demanded by the BFI.
14894 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14895 if (!N11C)
14896 return SDValue();
14897 unsigned InvMask = N->getConstantOperandVal(2);
14898 unsigned LSB = llvm::countr_zero(~InvMask);
14899 unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB;
14900 assert(Width <
14901 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
14902 "undefined behavior");
14903 unsigned Mask = (1u << Width) - 1;
14904 unsigned Mask2 = N11C->getZExtValue();
14905 if ((Mask & (~Mask2)) == 0)
14906 return DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
14907 N->getOperand(0), N1.getOperand(0), N->getOperand(2));
14908 return SDValue();
14909 }
14910
14911 // Look for another BFI to combine with.
14912 if (SDValue CombineBFI = FindBFIToCombineWith(N)) {
14913 // We've found a BFI.
14914 APInt ToMask1, FromMask1;
14915 SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
14916
14917 APInt ToMask2, FromMask2;
14918 SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
14919 assert(From1 == From2);
14920 (void)From2;
14921
14922 // Create a new BFI, combining the two together.
14923 APInt NewFromMask = FromMask1 | FromMask2;
14924 APInt NewToMask = ToMask1 | ToMask2;
14925
14926 EVT VT = N->getValueType(0);
14927 SDLoc dl(N);
14928
14929 if (NewFromMask[0] == 0)
14930 From1 = DAG.getNode(ISD::SRL, dl, VT, From1,
14931 DAG.getConstant(NewFromMask.countr_zero(), dl, VT));
14932 return DAG.getNode(ARMISD::BFI, dl, VT, CombineBFI.getOperand(0), From1,
14933 DAG.getConstant(~NewToMask, dl, VT));
14934 }
14935
14936 // Reassociate BFI(BFI (A, B, M1), C, M2) to BFI(BFI (A, C, M2), B, M1) so
14937 // that lower bit insertions are performed first, providing that M1 and M2
14938 // do no overlap. This can allow multiple BFI instructions to be combined
14939 // together by the other folds above.
14940 if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
14941 APInt ToMask1 = ~N->getConstantOperandAPInt(2);
14942 APInt ToMask2 = ~N0.getConstantOperandAPInt(2);
14943
14944 if (!N0.hasOneUse() || (ToMask1 & ToMask2) != 0 ||
14945 ToMask1.countl_zero() < ToMask2.countl_zero())
14946 return SDValue();
14947
14948 EVT VT = N->getValueType(0);
14949 SDLoc dl(N);
14950 SDValue BFI1 = DAG.getNode(ARMISD::BFI, dl, VT, N0.getOperand(0),
14951 N->getOperand(1), N->getOperand(2));
14952 return DAG.getNode(ARMISD::BFI, dl, VT, BFI1, N0.getOperand(1),
14953 N0.getOperand(2));
14954 }
14955
14956 return SDValue();
14957}
14958
14959// Check that N is CMPZ(CSINC(0, 0, CC, X)),
14960// or CMPZ(CMOV(1, 0, CC, $cpsr, X))
14961// return X if valid.
14963 if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
14964 return SDValue();
14965 SDValue CSInc = Cmp->getOperand(0);
14966
14967 // Ignore any `And 1` nodes that may not yet have been removed. We are
14968 // looking for a value that produces 1/0, so these have no effect on the
14969 // code.
14970 while (CSInc.getOpcode() == ISD::AND &&
14971 isa<ConstantSDNode>(CSInc.getOperand(1)) &&
14972 CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())
14973 CSInc = CSInc.getOperand(0);
14974
14975 if (CSInc.getOpcode() == ARMISD::CSINC &&
14976 isNullConstant(CSInc.getOperand(0)) &&
14977 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
14979 return CSInc.getOperand(3);
14980 }
14981 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) &&
14982 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
14984 return CSInc.getOperand(4);
14985 }
14986 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) &&
14987 isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
14990 return CSInc.getOperand(4);
14991 }
14992 return SDValue();
14993}
14994
14996 // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in
14997 // t92: glue = ARMISD::CMPZ t74, 0
14998 // t93: i32 = ARMISD::CSINC 0, 0, 1, t92
14999 // t96: glue = ARMISD::CMPZ t93, 0
15000 // t114: i32 = ARMISD::CSINV 0, 0, 0, t96
15002 if (SDValue C = IsCMPZCSINC(N, Cond))
15003 if (Cond == ARMCC::EQ)
15004 return C;
15005 return SDValue();
15006}
15007
15009 // Fold away an unneccessary CMPZ/CSINC
15010 // CSXYZ A, B, C1 (CMPZ (CSINC 0, 0, C2, D), 0) ->
15011 // if C1==EQ -> CSXYZ A, B, C2, D
15012 // if C1==NE -> CSXYZ A, B, NOT(C2), D
15014 if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
15015 if (N->getConstantOperandVal(2) == ARMCC::EQ)
15016 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15017 N->getOperand(1),
15018 DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
15019 if (N->getConstantOperandVal(2) == ARMCC::NE)
15020 return DAG.getNode(
15021 N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15022 N->getOperand(1),
15024 }
15025 return SDValue();
15026}
15027
15028/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
15029/// ARMISD::VMOVRRD.
15032 const ARMSubtarget *Subtarget) {
15033 // vmovrrd(vmovdrr x, y) -> x,y
15034 SDValue InDouble = N->getOperand(0);
15035 if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
15036 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
15037
15038 // vmovrrd(load f64) -> (load i32), (load i32)
15039 SDNode *InNode = InDouble.getNode();
15040 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
15041 InNode->getValueType(0) == MVT::f64 &&
15042 InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
15043 !cast<LoadSDNode>(InNode)->isVolatile()) {
15044 // TODO: Should this be done for non-FrameIndex operands?
15045 LoadSDNode *LD = cast<LoadSDNode>(InNode);
15046
15047 SelectionDAG &DAG = DCI.DAG;
15048 SDLoc DL(LD);
15049 SDValue BasePtr = LD->getBasePtr();
15050 SDValue NewLD1 =
15051 DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
15052 LD->getAlign(), LD->getMemOperand()->getFlags());
15053
15054 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
15055 DAG.getConstant(4, DL, MVT::i32));
15056
15057 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
15058 LD->getPointerInfo().getWithOffset(4),
15059 commonAlignment(LD->getAlign(), 4),
15060 LD->getMemOperand()->getFlags());
15061
15062 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
15063 if (DCI.DAG.getDataLayout().isBigEndian())
15064 std::swap (NewLD1, NewLD2);
15065 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
15066 return Result;
15067 }
15068
15069 // VMOVRRD(extract(..(build_vector(a, b, c, d)))) -> a,b or c,d
15070 // VMOVRRD(extract(insert_vector(insert_vector(.., a, l1), b, l2))) -> a,b
15071 if (InDouble.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15072 isa<ConstantSDNode>(InDouble.getOperand(1))) {
15073 SDValue BV = InDouble.getOperand(0);
15074 // Look up through any nop bitcasts and vector_reg_casts. bitcasts may
15075 // change lane order under big endian.
15076 bool BVSwap = BV.getOpcode() == ISD::BITCAST;
15077 while (
15078 (BV.getOpcode() == ISD::BITCAST ||
15080 (BV.getValueType() == MVT::v2f64 || BV.getValueType() == MVT::v2i64)) {
15081 BVSwap = BV.getOpcode() == ISD::BITCAST;
15082 BV = BV.getOperand(0);
15083 }
15084 if (BV.getValueType() != MVT::v4i32)
15085 return SDValue();
15086
15087 // Handle buildvectors, pulling out the correct lane depending on
15088 // endianness.
15089 unsigned Offset = InDouble.getConstantOperandVal(1) == 1 ? 2 : 0;
15090 if (BV.getOpcode() == ISD::BUILD_VECTOR) {
15091 SDValue Op0 = BV.getOperand(Offset);
15092 SDValue Op1 = BV.getOperand(Offset + 1);
15093 if (!Subtarget->isLittle() && BVSwap)
15094 std::swap(Op0, Op1);
15095
15096 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15097 }
15098
15099 // A chain of insert_vectors, grabbing the correct value of the chain of
15100 // inserts.
15101 SDValue Op0, Op1;
15102 while (BV.getOpcode() == ISD::INSERT_VECTOR_ELT) {
15103 if (isa<ConstantSDNode>(BV.getOperand(2))) {
15104 if (BV.getConstantOperandVal(2) == Offset)
15105 Op0 = BV.getOperand(1);
15106 if (BV.getConstantOperandVal(2) == Offset + 1)
15107 Op1 = BV.getOperand(1);
15108 }
15109 BV = BV.getOperand(0);
15110 }
15111 if (!Subtarget->isLittle() && BVSwap)
15112 std::swap(Op0, Op1);
15113 if (Op0 && Op1)
15114 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15115 }
15116
15117 return SDValue();
15118}
15119
15120/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
15121/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
15123 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
15124 SDValue Op0 = N->getOperand(0);
15125 SDValue Op1 = N->getOperand(1);
15126 if (Op0.getOpcode() == ISD::BITCAST)
15127 Op0 = Op0.getOperand(0);
15128 if (Op1.getOpcode() == ISD::BITCAST)
15129 Op1 = Op1.getOperand(0);
15130 if (Op0.getOpcode() == ARMISD::VMOVRRD &&
15131 Op0.getNode() == Op1.getNode() &&
15132 Op0.getResNo() == 0 && Op1.getResNo() == 1)
15133 return DAG.getNode(ISD::BITCAST, SDLoc(N),
15134 N->getValueType(0), Op0.getOperand(0));
15135 return SDValue();
15136}
15137
15140 SDValue Op0 = N->getOperand(0);
15141
15142 // VMOVhr (VMOVrh (X)) -> X
15143 if (Op0->getOpcode() == ARMISD::VMOVrh)
15144 return Op0->getOperand(0);
15145
15146 // FullFP16: half values are passed in S-registers, and we don't
15147 // need any of the bitcast and moves:
15148 //
15149 // t2: f32,ch1,gl1? = CopyFromReg ch, Register:f32 %0, gl?
15150 // t5: i32 = bitcast t2
15151 // t18: f16 = ARMISD::VMOVhr t5
15152 // =>
15153 // tN: f16,ch2,gl2? = CopyFromReg ch, Register::f32 %0, gl?
15154 if (Op0->getOpcode() == ISD::BITCAST) {
15155 SDValue Copy = Op0->getOperand(0);
15156 if (Copy.getValueType() == MVT::f32 &&
15157 Copy->getOpcode() == ISD::CopyFromReg) {
15158 bool HasGlue = Copy->getNumOperands() == 3;
15159 SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1),
15160 HasGlue ? Copy->getOperand(2) : SDValue()};
15161 EVT OutTys[] = {N->getValueType(0), MVT::Other, MVT::Glue};
15162 SDValue NewCopy =
15164 DCI.DAG.getVTList(ArrayRef(OutTys, HasGlue ? 3 : 2)),
15165 ArrayRef(Ops, HasGlue ? 3 : 2));
15166
15167 // Update Users, Chains, and Potential Glue.
15168 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), NewCopy.getValue(0));
15169 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(1), NewCopy.getValue(1));
15170 if (HasGlue)
15171 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(2),
15172 NewCopy.getValue(2));
15173
15174 return NewCopy;
15175 }
15176 }
15177
15178 // fold (VMOVhr (load x)) -> (load (f16*)x)
15179 if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
15180 if (LN0->hasOneUse() && LN0->isUnindexed() &&
15181 LN0->getMemoryVT() == MVT::i16) {
15182 SDValue Load =
15183 DCI.DAG.getLoad(N->getValueType(0), SDLoc(N), LN0->getChain(),
15184 LN0->getBasePtr(), LN0->getMemOperand());
15185 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15186 DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
15187 return Load;
15188 }
15189 }
15190
15191 // Only the bottom 16 bits of the source register are used.
15192 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15193 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15194 if (TLI.SimplifyDemandedBits(Op0, DemandedMask, DCI))
15195 return SDValue(N, 0);
15196
15197 return SDValue();
15198}
15199
15201 SDValue N0 = N->getOperand(0);
15202 EVT VT = N->getValueType(0);
15203
15204 // fold (VMOVrh (fpconst x)) -> const x
15205 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0)) {
15206 APFloat V = C->getValueAPF();
15207 return DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT);
15208 }
15209
15210 // fold (VMOVrh (load x)) -> (zextload (i16*)x)
15211 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
15212 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15213
15214 SDValue Load =
15215 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(),
15216 LN0->getBasePtr(), MVT::i16, LN0->getMemOperand());
15217 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15218 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15219 return Load;
15220 }
15221
15222 // Fold VMOVrh(extract(x, n)) -> vgetlaneu(x, n)
15223 if (N0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15224 isa<ConstantSDNode>(N0->getOperand(1)))
15225 return DAG.getNode(ARMISD::VGETLANEu, SDLoc(N), VT, N0->getOperand(0),
15226 N0->getOperand(1));
15227
15228 return SDValue();
15229}
15230
15231/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
15232/// are normal, non-volatile loads. If so, it is profitable to bitcast an
15233/// i64 vector to have f64 elements, since the value can then be loaded
15234/// directly into a VFP register.
15236 unsigned NumElts = N->getValueType(0).getVectorNumElements();
15237 for (unsigned i = 0; i < NumElts; ++i) {
15238 SDNode *Elt = N->getOperand(i).getNode();
15239 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
15240 return true;
15241 }
15242 return false;
15243}
15244
15245/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
15246/// ISD::BUILD_VECTOR.
15249 const ARMSubtarget *Subtarget) {
15250 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
15251 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
15252 // into a pair of GPRs, which is fine when the value is used as a scalar,
15253 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
15254 SelectionDAG &DAG = DCI.DAG;
15255 if (N->getNumOperands() == 2)
15256 if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
15257 return RV;
15258
15259 // Load i64 elements as f64 values so that type legalization does not split
15260 // them up into i32 values.
15261 EVT VT = N->getValueType(0);
15262 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
15263 return SDValue();
15264 SDLoc dl(N);
15266 unsigned NumElts = VT.getVectorNumElements();
15267 for (unsigned i = 0; i < NumElts; ++i) {
15268 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
15269 Ops.push_back(V);
15270 // Make the DAGCombiner fold the bitcast.
15271 DCI.AddToWorklist(V.getNode());
15272 }
15273 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
15274 SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
15275 return DAG.getNode(ISD::BITCAST, dl, VT, BV);
15276}
15277
15278/// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
15279static SDValue
15281 // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
15282 // At that time, we may have inserted bitcasts from integer to float.
15283 // If these bitcasts have survived DAGCombine, change the lowering of this
15284 // BUILD_VECTOR in something more vector friendly, i.e., that does not
15285 // force to use floating point types.
15286
15287 // Make sure we can change the type of the vector.
15288 // This is possible iff:
15289 // 1. The vector is only used in a bitcast to a integer type. I.e.,
15290 // 1.1. Vector is used only once.
15291 // 1.2. Use is a bit convert to an integer type.
15292 // 2. The size of its operands are 32-bits (64-bits are not legal).
15293 EVT VT = N->getValueType(0);
15294 EVT EltVT = VT.getVectorElementType();
15295
15296 // Check 1.1. and 2.
15297 if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
15298 return SDValue();
15299
15300 // By construction, the input type must be float.
15301 assert(EltVT == MVT::f32 && "Unexpected type!");
15302
15303 // Check 1.2.
15304 SDNode *Use = *N->use_begin();
15305 if (Use->getOpcode() != ISD::BITCAST ||
15306 Use->getValueType(0).isFloatingPoint())
15307 return SDValue();
15308
15309 // Check profitability.
15310 // Model is, if more than half of the relevant operands are bitcast from
15311 // i32, turn the build_vector into a sequence of insert_vector_elt.
15312 // Relevant operands are everything that is not statically
15313 // (i.e., at compile time) bitcasted.
15314 unsigned NumOfBitCastedElts = 0;
15315 unsigned NumElts = VT.getVectorNumElements();
15316 unsigned NumOfRelevantElts = NumElts;
15317 for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
15318 SDValue Elt = N->getOperand(Idx);
15319 if (Elt->getOpcode() == ISD::BITCAST) {
15320 // Assume only bit cast to i32 will go away.
15321 if (Elt->getOperand(0).getValueType() == MVT::i32)
15322 ++NumOfBitCastedElts;
15323 } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
15324 // Constants are statically casted, thus do not count them as
15325 // relevant operands.
15326 --NumOfRelevantElts;
15327 }
15328
15329 // Check if more than half of the elements require a non-free bitcast.
15330 if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
15331 return SDValue();
15332
15333 SelectionDAG &DAG = DCI.DAG;
15334 // Create the new vector type.
15335 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
15336 // Check if the type is legal.
15337 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15338 if (!TLI.isTypeLegal(VecVT))
15339 return SDValue();
15340
15341 // Combine:
15342 // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
15343 // => BITCAST INSERT_VECTOR_ELT
15344 // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
15345 // (BITCAST EN), N.
15346 SDValue Vec = DAG.getUNDEF(VecVT);
15347 SDLoc dl(N);
15348 for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
15349 SDValue V = N->getOperand(Idx);
15350 if (V.isUndef())
15351 continue;
15352 if (V.getOpcode() == ISD::BITCAST &&
15353 V->getOperand(0).getValueType() == MVT::i32)
15354 // Fold obvious case.
15355 V = V.getOperand(0);
15356 else {
15357 V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
15358 // Make the DAGCombiner fold the bitcasts.
15359 DCI.AddToWorklist(V.getNode());
15360 }
15361 SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
15362 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
15363 }
15364 Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
15365 // Make the DAGCombiner fold the bitcasts.
15366 DCI.AddToWorklist(Vec.getNode());
15367 return Vec;
15368}
15369
15370static SDValue
15372 EVT VT = N->getValueType(0);
15373 SDValue Op = N->getOperand(0);
15374 SDLoc dl(N);
15375
15376 // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x)
15377 if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {
15378 // If the valuetypes are the same, we can remove the cast entirely.
15379 if (Op->getOperand(0).getValueType() == VT)
15380 return Op->getOperand(0);
15381 return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15382 }
15383
15384 // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
15385 // more VPNOT which might get folded as else predicates.
15386 if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) {
15387 SDValue X =
15388 DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15390 DCI.DAG.getConstant(65535, dl, MVT::i32));
15391 return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C);
15392 }
15393
15394 // Only the bottom 16 bits of the source register are used.
15395 if (Op.getValueType() == MVT::i32) {
15396 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15397 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15398 if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
15399 return SDValue(N, 0);
15400 }
15401 return SDValue();
15402}
15403
15405 const ARMSubtarget *ST) {
15406 EVT VT = N->getValueType(0);
15407 SDValue Op = N->getOperand(0);
15408 SDLoc dl(N);
15409
15410 // Under Little endian, a VECTOR_REG_CAST is equivalent to a BITCAST
15411 if (ST->isLittle())
15412 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
15413
15414 // VECTOR_REG_CAST undef -> undef
15415 if (Op.isUndef())
15416 return DAG.getUNDEF(VT);
15417
15418 // VECTOR_REG_CAST(VECTOR_REG_CAST(x)) == VECTOR_REG_CAST(x)
15419 if (Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {
15420 // If the valuetypes are the same, we can remove the cast entirely.
15421 if (Op->getOperand(0).getValueType() == VT)
15422 return Op->getOperand(0);
15423 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Op->getOperand(0));
15424 }
15425
15426 return SDValue();
15427}
15428
15430 const ARMSubtarget *Subtarget) {
15431 if (!Subtarget->hasMVEIntegerOps())
15432 return SDValue();
15433
15434 EVT VT = N->getValueType(0);
15435 SDValue Op0 = N->getOperand(0);
15436 SDValue Op1 = N->getOperand(1);
15437 ARMCC::CondCodes Cond = (ARMCC::CondCodes)N->getConstantOperandVal(2);
15438 SDLoc dl(N);
15439
15440 // vcmp X, 0, cc -> vcmpz X, cc
15441 if (isZeroVector(Op1))
15442 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0, N->getOperand(2));
15443
15444 unsigned SwappedCond = getSwappedCondition(Cond);
15445 if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) {
15446 // vcmp 0, X, cc -> vcmpz X, reversed(cc)
15447 if (isZeroVector(Op0))
15448 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1,
15449 DAG.getConstant(SwappedCond, dl, MVT::i32));
15450 // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)
15451 if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)
15452 return DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0,
15453 DAG.getConstant(SwappedCond, dl, MVT::i32));
15454 }
15455
15456 return SDValue();
15457}
15458
15459/// PerformInsertEltCombine - Target-specific dag combine xforms for
15460/// ISD::INSERT_VECTOR_ELT.
15463 // Bitcast an i64 load inserted into a vector to f64.
15464 // Otherwise, the i64 value will be legalized to a pair of i32 values.
15465 EVT VT = N->getValueType(0);
15466 SDNode *Elt = N->getOperand(1).getNode();
15467 if (VT.getVectorElementType() != MVT::i64 ||
15468 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
15469 return SDValue();
15470
15471 SelectionDAG &DAG = DCI.DAG;
15472 SDLoc dl(N);
15473 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
15475 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
15476 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
15477 // Make the DAGCombiner fold the bitcasts.
15478 DCI.AddToWorklist(Vec.getNode());
15479 DCI.AddToWorklist(V.getNode());
15480 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
15481 Vec, V, N->getOperand(2));
15482 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
15483}
15484
15485// Convert a pair of extracts from the same base vector to a VMOVRRD. Either
15486// directly or bitcast to an integer if the original is a float vector.
15487// extract(x, n); extract(x, n+1) -> VMOVRRD(extract v2f64 x, n/2)
15488// bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD(extract x, n/2)
15489static SDValue
15491 EVT VT = N->getValueType(0);
15492 SDLoc dl(N);
15493
15494 if (!DCI.isAfterLegalizeDAG() || VT != MVT::i32 ||
15495 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(MVT::f64))
15496 return SDValue();
15497
15498 SDValue Ext = SDValue(N, 0);
15499 if (Ext.getOpcode() == ISD::BITCAST &&
15500 Ext.getOperand(0).getValueType() == MVT::f32)
15501 Ext = Ext.getOperand(0);
15502 if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15503 !isa<ConstantSDNode>(Ext.getOperand(1)) ||
15504 Ext.getConstantOperandVal(1) % 2 != 0)
15505 return SDValue();
15506 if (Ext->use_size() == 1 &&
15507 (Ext->use_begin()->getOpcode() == ISD::SINT_TO_FP ||
15508 Ext->use_begin()->getOpcode() == ISD::UINT_TO_FP))
15509 return SDValue();
15510
15511 SDValue Op0 = Ext.getOperand(0);
15512 EVT VecVT = Op0.getValueType();
15513 unsigned ResNo = Op0.getResNo();
15514 unsigned Lane = Ext.getConstantOperandVal(1);
15515 if (VecVT.getVectorNumElements() != 4)
15516 return SDValue();
15517
15518 // Find another extract, of Lane + 1
15519 auto OtherIt = find_if(Op0->uses(), [&](SDNode *V) {
15520 return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15521 isa<ConstantSDNode>(V->getOperand(1)) &&
15522 V->getConstantOperandVal(1) == Lane + 1 &&
15523 V->getOperand(0).getResNo() == ResNo;
15524 });
15525 if (OtherIt == Op0->uses().end())
15526 return SDValue();
15527
15528 // For float extracts, we need to be converting to a i32 for both vector
15529 // lanes.
15530 SDValue OtherExt(*OtherIt, 0);
15531 if (OtherExt.getValueType() != MVT::i32) {
15532 if (OtherExt->use_size() != 1 ||
15533 OtherExt->use_begin()->getOpcode() != ISD::BITCAST ||
15534 OtherExt->use_begin()->getValueType(0) != MVT::i32)
15535 return SDValue();
15536 OtherExt = SDValue(*OtherExt->use_begin(), 0);
15537 }
15538
15539 // Convert the type to a f64 and extract with a VMOVRRD.
15540 SDValue F64 = DCI.DAG.getNode(
15541 ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
15542 DCI.DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v2f64, Op0),
15543 DCI.DAG.getConstant(Ext.getConstantOperandVal(1) / 2, dl, MVT::i32));
15544 SDValue VMOVRRD =
15545 DCI.DAG.getNode(ARMISD::VMOVRRD, dl, {MVT::i32, MVT::i32}, F64);
15546
15547 DCI.CombineTo(OtherExt.getNode(), SDValue(VMOVRRD.getNode(), 1));
15548 return VMOVRRD;
15549}
15550
15553 const ARMSubtarget *ST) {
15554 SDValue Op0 = N->getOperand(0);
15555 EVT VT = N->getValueType(0);
15556 SDLoc dl(N);
15557
15558 // extract (vdup x) -> x
15559 if (Op0->getOpcode() == ARMISD::VDUP) {
15560 SDValue X = Op0->getOperand(0);
15561 if (VT == MVT::f16 && X.getValueType() == MVT::i32)
15562 return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X);
15563 if (VT == MVT::i32 && X.getValueType() == MVT::f16)
15564 return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X);
15565 if (VT == MVT::f32 && X.getValueType() == MVT::i32)
15566 return DCI.DAG.getNode(ISD::BITCAST, dl, VT, X);
15567
15568 while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
15569 X = X->getOperand(0);
15570 if (X.getValueType() == VT)
15571 return X;
15572 }
15573
15574 // extract ARM_BUILD_VECTOR -> x
15575 if (Op0->getOpcode() == ARMISD::BUILD_VECTOR &&
15576 isa<ConstantSDNode>(N->getOperand(1)) &&
15577 N->getConstantOperandVal(1) < Op0.getNumOperands()) {
15578 return Op0.getOperand(N->getConstantOperandVal(1));
15579 }
15580
15581 // extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b
15582 if (Op0.getValueType() == MVT::v4i32 &&
15583 isa<ConstantSDNode>(N->getOperand(1)) &&
15584 Op0.getOpcode() == ISD::BITCAST &&
15586 Op0.getOperand(0).getValueType() == MVT::v2f64) {
15587 SDValue BV = Op0.getOperand(0);
15588 unsigned Offset = N->getConstantOperandVal(1);
15589 SDValue MOV = BV.getOperand(Offset < 2 ? 0 : 1);
15590 if (MOV.getOpcode() == ARMISD::VMOVDRR)
15591 return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);
15592 }
15593
15594 // extract x, n; extract x, n+1 -> VMOVRRD x
15595 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
15596 return R;
15597
15598 // extract (MVETrunc(x)) -> extract x
15599 if (Op0->getOpcode() == ARMISD::MVETRUNC) {
15600 unsigned Idx = N->getConstantOperandVal(1);
15601 unsigned Vec =
15603 unsigned SubIdx =
15605 return DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Op0.getOperand(Vec),
15606 DCI.DAG.getConstant(SubIdx, dl, MVT::i32));
15607 }
15608
15609 return SDValue();
15610}
15611
15613 SDValue Op = N->getOperand(0);
15614 EVT VT = N->getValueType(0);
15615
15616 // sext_inreg(VGETLANEu) -> VGETLANEs
15617 if (Op.getOpcode() == ARMISD::VGETLANEu &&
15618 cast<VTSDNode>(N->getOperand(1))->getVT() ==
15619 Op.getOperand(0).getValueType().getScalarType())
15620 return DAG.getNode(ARMISD::VGETLANEs, SDLoc(N), VT, Op.getOperand(0),
15621 Op.getOperand(1));
15622
15623 return SDValue();
15624}
15625
15626static SDValue
15628 SDValue Vec = N->getOperand(0);
15629 SDValue SubVec = N->getOperand(1);
15630 uint64_t IdxVal = N->getConstantOperandVal(2);
15631 EVT VecVT = Vec.getValueType();
15632 EVT SubVT = SubVec.getValueType();
15633
15634 // Only do this for legal fixed vector types.
15635 if (!VecVT.isFixedLengthVector() ||
15636 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
15638 return SDValue();
15639
15640 // Ignore widening patterns.
15641 if (IdxVal == 0 && Vec.isUndef())
15642 return SDValue();
15643
15644 // Subvector must be half the width and an "aligned" insertion.
15645 unsigned NumSubElts = SubVT.getVectorNumElements();
15646 if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
15647 (IdxVal != 0 && IdxVal != NumSubElts))
15648 return SDValue();
15649
15650 // Fold insert_subvector -> concat_vectors
15651 // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
15652 // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
15653 SDLoc DL(N);
15654 SDValue Lo, Hi;
15655 if (IdxVal == 0) {
15656 Lo = SubVec;
15657 Hi = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15658 DCI.DAG.getVectorIdxConstant(NumSubElts, DL));
15659 } else {
15660 Lo = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15661 DCI.DAG.getVectorIdxConstant(0, DL));
15662 Hi = SubVec;
15663 }
15664 return DCI.DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
15665}
15666
15667// shuffle(MVETrunc(x, y)) -> VMOVN(x, y)
15669 SelectionDAG &DAG) {
15670 SDValue Trunc = N->getOperand(0);
15671 EVT VT = Trunc.getValueType();
15672 if (Trunc.getOpcode() != ARMISD::MVETRUNC || !N->getOperand(1).isUndef())
15673 return SDValue();
15674
15675 SDLoc DL(Trunc);
15676 if (isVMOVNTruncMask(N->getMask(), VT, false))
15677 return DAG.getNode(
15678 ARMISD::VMOVN, DL, VT,
15679 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15680 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15681 DAG.getConstant(1, DL, MVT::i32));
15682 else if (isVMOVNTruncMask(N->getMask(), VT, true))
15683 return DAG.getNode(
15684 ARMISD::VMOVN, DL, VT,
15685 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15686 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15687 DAG.getConstant(1, DL, MVT::i32));
15688 return SDValue();
15689}
15690
15691/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
15692/// ISD::VECTOR_SHUFFLE.
15694 if (SDValue R = PerformShuffleVMOVNCombine(cast<ShuffleVectorSDNode>(N), DAG))
15695 return R;
15696
15697 // The LLVM shufflevector instruction does not require the shuffle mask
15698 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
15699 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
15700 // operands do not match the mask length, they are extended by concatenating
15701 // them with undef vectors. That is probably the right thing for other
15702 // targets, but for NEON it is better to concatenate two double-register
15703 // size vector operands into a single quad-register size vector. Do that
15704 // transformation here:
15705 // shuffle(concat(v1, undef), concat(v2, undef)) ->
15706 // shuffle(concat(v1, v2), undef)
15707 SDValue Op0 = N->getOperand(0);
15708 SDValue Op1 = N->getOperand(1);
15709 if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
15710 Op1.getOpcode() != ISD::CONCAT_VECTORS ||
15711 Op0.getNumOperands() != 2 ||
15712 Op1.getNumOperands() != 2)
15713 return SDValue();
15714 SDValue Concat0Op1 = Op0.getOperand(1);
15715 SDValue Concat1Op1 = Op1.getOperand(1);
15716 if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
15717 return SDValue();
15718 // Skip the transformation if any of the types are illegal.
15719 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15720 EVT VT = N->getValueType(0);
15721 if (!TLI.isTypeLegal(VT) ||
15722 !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
15723 !TLI.isTypeLegal(Concat1Op1.getValueType()))
15724 return SDValue();
15725
15726 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
15727 Op0.getOperand(0), Op1.getOperand(0));
15728 // Translate the shuffle mask.
15729 SmallVector<int, 16> NewMask;
15730 unsigned NumElts = VT.getVectorNumElements();
15731 unsigned HalfElts = NumElts/2;
15732 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15733 for (unsigned n = 0; n < NumElts; ++n) {
15734 int MaskElt = SVN->getMaskElt(n);
15735 int NewElt = -1;
15736 if (MaskElt < (int)HalfElts)
15737 NewElt = MaskElt;
15738 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
15739 NewElt = HalfElts + MaskElt - NumElts;
15740 NewMask.push_back(NewElt);
15741 }
15742 return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
15743 DAG.getUNDEF(VT), NewMask);
15744}
15745
15746/// Load/store instruction that can be merged with a base address
15747/// update
15752 unsigned AddrOpIdx;
15753};
15754
15756 /// Instruction that updates a pointer
15758 /// Pointer increment operand
15760 /// Pointer increment value if it is a constant, or 0 otherwise
15761 unsigned ConstInc;
15762};
15763
15765 struct BaseUpdateUser &User,
15766 bool SimpleConstIncOnly,
15768 SelectionDAG &DAG = DCI.DAG;
15769 SDNode *N = Target.N;
15770 MemSDNode *MemN = cast<MemSDNode>(N);
15771 SDLoc dl(N);
15772
15773 // Find the new opcode for the updating load/store.
15774 bool isLoadOp = true;
15775 bool isLaneOp = false;
15776 // Workaround for vst1x and vld1x intrinsics which do not have alignment
15777 // as an operand.
15778 bool hasAlignment = true;
15779 unsigned NewOpc = 0;
15780 unsigned NumVecs = 0;
15781 if (Target.isIntrinsic) {
15782 unsigned IntNo = N->getConstantOperandVal(1);
15783 switch (IntNo) {
15784 default:
15785 llvm_unreachable("unexpected intrinsic for Neon base update");
15786 case Intrinsic::arm_neon_vld1:
15787 NewOpc = ARMISD::VLD1_UPD;
15788 NumVecs = 1;
15789 break;
15790 case Intrinsic::arm_neon_vld2:
15791 NewOpc = ARMISD::VLD2_UPD;
15792 NumVecs = 2;
15793 break;
15794 case Intrinsic::arm_neon_vld3:
15795 NewOpc = ARMISD::VLD3_UPD;
15796 NumVecs = 3;
15797 break;
15798 case Intrinsic::arm_neon_vld4:
15799 NewOpc = ARMISD::VLD4_UPD;
15800 NumVecs = 4;
15801 break;
15802 case Intrinsic::arm_neon_vld1x2:
15803 NewOpc = ARMISD::VLD1x2_UPD;
15804 NumVecs = 2;
15805 hasAlignment = false;
15806 break;
15807 case Intrinsic::arm_neon_vld1x3:
15808 NewOpc = ARMISD::VLD1x3_UPD;
15809 NumVecs = 3;
15810 hasAlignment = false;
15811 break;
15812 case Intrinsic::arm_neon_vld1x4:
15813 NewOpc = ARMISD::VLD1x4_UPD;
15814 NumVecs = 4;
15815 hasAlignment = false;
15816 break;
15817 case Intrinsic::arm_neon_vld2dup:
15818 NewOpc = ARMISD::VLD2DUP_UPD;
15819 NumVecs = 2;
15820 break;
15821 case Intrinsic::arm_neon_vld3dup:
15822 NewOpc = ARMISD::VLD3DUP_UPD;
15823 NumVecs = 3;
15824 break;
15825 case Intrinsic::arm_neon_vld4dup:
15826 NewOpc = ARMISD::VLD4DUP_UPD;
15827 NumVecs = 4;
15828 break;
15829 case Intrinsic::arm_neon_vld2lane:
15830 NewOpc = ARMISD::VLD2LN_UPD;
15831 NumVecs = 2;
15832 isLaneOp = true;
15833 break;
15834 case Intrinsic::arm_neon_vld3lane:
15835 NewOpc = ARMISD::VLD3LN_UPD;
15836 NumVecs = 3;
15837 isLaneOp = true;
15838 break;
15839 case Intrinsic::arm_neon_vld4lane:
15840 NewOpc = ARMISD::VLD4LN_UPD;
15841 NumVecs = 4;
15842 isLaneOp = true;
15843 break;
15844 case Intrinsic::arm_neon_vst1:
15845 NewOpc = ARMISD::VST1_UPD;
15846 NumVecs = 1;
15847 isLoadOp = false;
15848 break;
15849 case Intrinsic::arm_neon_vst2:
15850 NewOpc = ARMISD::VST2_UPD;
15851 NumVecs = 2;
15852 isLoadOp = false;
15853 break;
15854 case Intrinsic::arm_neon_vst3:
15855 NewOpc = ARMISD::VST3_UPD;
15856 NumVecs = 3;
15857 isLoadOp = false;
15858 break;
15859 case Intrinsic::arm_neon_vst4:
15860 NewOpc = ARMISD::VST4_UPD;
15861 NumVecs = 4;
15862 isLoadOp = false;
15863 break;
15864 case Intrinsic::arm_neon_vst2lane:
15865 NewOpc = ARMISD::VST2LN_UPD;
15866 NumVecs = 2;
15867 isLoadOp = false;
15868 isLaneOp = true;
15869 break;
15870 case Intrinsic::arm_neon_vst3lane:
15871 NewOpc = ARMISD::VST3LN_UPD;
15872 NumVecs = 3;
15873 isLoadOp = false;
15874 isLaneOp = true;
15875 break;
15876 case Intrinsic::arm_neon_vst4lane:
15877 NewOpc = ARMISD::VST4LN_UPD;
15878 NumVecs = 4;
15879 isLoadOp = false;
15880 isLaneOp = true;
15881 break;
15882 case Intrinsic::arm_neon_vst1x2:
15883 NewOpc = ARMISD::VST1x2_UPD;
15884 NumVecs = 2;
15885 isLoadOp = false;
15886 hasAlignment = false;
15887 break;
15888 case Intrinsic::arm_neon_vst1x3:
15889 NewOpc = ARMISD::VST1x3_UPD;
15890 NumVecs = 3;
15891 isLoadOp = false;
15892 hasAlignment = false;
15893 break;
15894 case Intrinsic::arm_neon_vst1x4:
15895 NewOpc = ARMISD::VST1x4_UPD;
15896 NumVecs = 4;
15897 isLoadOp = false;
15898 hasAlignment = false;
15899 break;
15900 }
15901 } else {
15902 isLaneOp = true;
15903 switch (N->getOpcode()) {
15904 default:
15905 llvm_unreachable("unexpected opcode for Neon base update");
15906 case ARMISD::VLD1DUP:
15907 NewOpc = ARMISD::VLD1DUP_UPD;
15908 NumVecs = 1;
15909 break;
15910 case ARMISD::VLD2DUP:
15911 NewOpc = ARMISD::VLD2DUP_UPD;
15912 NumVecs = 2;
15913 break;
15914 case ARMISD::VLD3DUP:
15915 NewOpc = ARMISD::VLD3DUP_UPD;
15916 NumVecs = 3;
15917 break;
15918 case ARMISD::VLD4DUP:
15919 NewOpc = ARMISD::VLD4DUP_UPD;
15920 NumVecs = 4;
15921 break;
15922 case ISD::LOAD:
15923 NewOpc = ARMISD::VLD1_UPD;
15924 NumVecs = 1;
15925 isLaneOp = false;
15926 break;
15927 case ISD::STORE:
15928 NewOpc = ARMISD::VST1_UPD;
15929 NumVecs = 1;
15930 isLaneOp = false;
15931 isLoadOp = false;
15932 break;
15933 }
15934 }
15935
15936 // Find the size of memory referenced by the load/store.
15937 EVT VecTy;
15938 if (isLoadOp) {
15939 VecTy = N->getValueType(0);
15940 } else if (Target.isIntrinsic) {
15941 VecTy = N->getOperand(Target.AddrOpIdx + 1).getValueType();
15942 } else {
15943 assert(Target.isStore &&
15944 "Node has to be a load, a store, or an intrinsic!");
15945 VecTy = N->getOperand(1).getValueType();
15946 }
15947
15948 bool isVLDDUPOp =
15949 NewOpc == ARMISD::VLD1DUP_UPD || NewOpc == ARMISD::VLD2DUP_UPD ||
15950 NewOpc == ARMISD::VLD3DUP_UPD || NewOpc == ARMISD::VLD4DUP_UPD;
15951
15952 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
15953 if (isLaneOp || isVLDDUPOp)
15954 NumBytes /= VecTy.getVectorNumElements();
15955
15956 if (NumBytes >= 3 * 16 && User.ConstInc != NumBytes) {
15957 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
15958 // separate instructions that make it harder to use a non-constant update.
15959 return false;
15960 }
15961
15962 if (SimpleConstIncOnly && User.ConstInc != NumBytes)
15963 return false;
15964
15965 // OK, we found an ADD we can fold into the base update.
15966 // Now, create a _UPD node, taking care of not breaking alignment.
15967
15968 EVT AlignedVecTy = VecTy;
15969 Align Alignment = MemN->getAlign();
15970
15971 // If this is a less-than-standard-aligned load/store, change the type to
15972 // match the standard alignment.
15973 // The alignment is overlooked when selecting _UPD variants; and it's
15974 // easier to introduce bitcasts here than fix that.
15975 // There are 3 ways to get to this base-update combine:
15976 // - intrinsics: they are assumed to be properly aligned (to the standard
15977 // alignment of the memory type), so we don't need to do anything.
15978 // - ARMISD::VLDx nodes: they are only generated from the aforementioned
15979 // intrinsics, so, likewise, there's nothing to do.
15980 // - generic load/store instructions: the alignment is specified as an
15981 // explicit operand, rather than implicitly as the standard alignment
15982 // of the memory type (like the intrisics). We need to change the
15983 // memory type to match the explicit alignment. That way, we don't
15984 // generate non-standard-aligned ARMISD::VLDx nodes.
15985 if (isa<LSBaseSDNode>(N)) {
15986 if (Alignment.value() < VecTy.getScalarSizeInBits() / 8) {
15987 MVT EltTy = MVT::getIntegerVT(Alignment.value() * 8);
15988 assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
15989 assert(!isLaneOp && "Unexpected generic load/store lane.");
15990 unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
15991 AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
15992 }
15993 // Don't set an explicit alignment on regular load/stores that we want
15994 // to transform to VLD/VST 1_UPD nodes.
15995 // This matches the behavior of regular load/stores, which only get an
15996 // explicit alignment if the MMO alignment is larger than the standard
15997 // alignment of the memory type.
15998 // Intrinsics, however, always get an explicit alignment, set to the
15999 // alignment of the MMO.
16000 Alignment = Align(1);
16001 }
16002
16003 // Create the new updating load/store node.
16004 // First, create an SDVTList for the new updating node's results.
16005 EVT Tys[6];
16006 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16007 unsigned n;
16008 for (n = 0; n < NumResultVecs; ++n)
16009 Tys[n] = AlignedVecTy;
16010 Tys[n++] = MVT::i32;
16011 Tys[n] = MVT::Other;
16012 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16013
16014 // Then, gather the new node's operands.
16016 Ops.push_back(N->getOperand(0)); // incoming chain
16017 Ops.push_back(N->getOperand(Target.AddrOpIdx));
16018 Ops.push_back(User.Inc);
16019
16020 if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
16021 // Try to match the intrinsic's signature
16022 Ops.push_back(StN->getValue());
16023 } else {
16024 // Loads (and of course intrinsics) match the intrinsics' signature,
16025 // so just add all but the alignment operand.
16026 unsigned LastOperand =
16027 hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();
16028 for (unsigned i = Target.AddrOpIdx + 1; i < LastOperand; ++i)
16029 Ops.push_back(N->getOperand(i));
16030 }
16031
16032 // For all node types, the alignment operand is always the last one.
16033 Ops.push_back(DAG.getConstant(Alignment.value(), dl, MVT::i32));
16034
16035 // If this is a non-standard-aligned STORE, the penultimate operand is the
16036 // stored value. Bitcast it to the aligned type.
16037 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
16038 SDValue &StVal = Ops[Ops.size() - 2];
16039 StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
16040 }
16041
16042 EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
16043 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
16044 MemN->getMemOperand());
16045
16046 // Update the uses.
16047 SmallVector<SDValue, 5> NewResults;
16048 for (unsigned i = 0; i < NumResultVecs; ++i)
16049 NewResults.push_back(SDValue(UpdN.getNode(), i));
16050
16051 // If this is an non-standard-aligned LOAD, the first result is the loaded
16052 // value. Bitcast it to the expected result type.
16053 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
16054 SDValue &LdVal = NewResults[0];
16055 LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
16056 }
16057
16058 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16059 DCI.CombineTo(N, NewResults);
16060 DCI.CombineTo(User.N, SDValue(UpdN.getNode(), NumResultVecs));
16061
16062 return true;
16063}
16064
16065// If (opcode ptr inc) is and ADD-like instruction, return the
16066// increment value. Otherwise return 0.
16067static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr,
16068 SDValue Inc, const SelectionDAG &DAG) {
16069 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16070 if (!CInc)
16071 return 0;
16072
16073 switch (Opcode) {
16074 case ARMISD::VLD1_UPD:
16075 case ISD::ADD:
16076 return CInc->getZExtValue();
16077 case ISD::OR: {
16078 if (DAG.haveNoCommonBitsSet(Ptr, Inc)) {
16079 // (OR ptr inc) is the same as (ADD ptr inc)
16080 return CInc->getZExtValue();
16081 }
16082 return 0;
16083 }
16084 default:
16085 return 0;
16086 }
16087}
16088
16090 switch (N->getOpcode()) {
16091 case ISD::ADD:
16092 case ISD::OR: {
16093 if (isa<ConstantSDNode>(N->getOperand(1))) {
16094 *Ptr = N->getOperand(0);
16095 *CInc = N->getOperand(1);
16096 return true;
16097 }
16098 return false;
16099 }
16100 case ARMISD::VLD1_UPD: {
16101 if (isa<ConstantSDNode>(N->getOperand(2))) {
16102 *Ptr = N->getOperand(1);
16103 *CInc = N->getOperand(2);
16104 return true;
16105 }
16106 return false;
16107 }
16108 default:
16109 return false;
16110 }
16111}
16112
16114 // Check that the add is independent of the load/store.
16115 // Otherwise, folding it would create a cycle. Search through Addr
16116 // as well, since the User may not be a direct user of Addr and
16117 // only share a base pointer.
16120 Worklist.push_back(N);
16121 Worklist.push_back(User);
16122 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16123 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16124 return false;
16125 return true;
16126}
16127
16128/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
16129/// NEON load/store intrinsics, and generic vector load/stores, to merge
16130/// base address updates.
16131/// For generic load/stores, the memory type is assumed to be a vector.
16132/// The caller is assumed to have checked legality.
16135 const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
16136 N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
16137 const bool isStore = N->getOpcode() == ISD::STORE;
16138 const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
16139 BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
16140
16141 SDValue Addr = N->getOperand(AddrOpIdx);
16142
16144
16145 // Search for a use of the address operand that is an increment.
16146 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
16147 UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
16148 SDNode *User = *UI;
16149 if (UI.getUse().getResNo() != Addr.getResNo() ||
16150 User->getNumOperands() != 2)
16151 continue;
16152
16153 SDValue Inc = User->getOperand(UI.getOperandNo() == 1 ? 0 : 1);
16154 unsigned ConstInc =
16155 getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
16156
16157 if (ConstInc || User->getOpcode() == ISD::ADD)
16158 BaseUpdates.push_back({User, Inc, ConstInc});
16159 }
16160
16161 // If the address is a constant pointer increment itself, find
16162 // another constant increment that has the same base operand
16163 SDValue Base;
16164 SDValue CInc;
16165 if (findPointerConstIncrement(Addr.getNode(), &Base, &CInc)) {
16166 unsigned Offset =
16167 getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG);
16168 for (SDNode::use_iterator UI = Base->use_begin(), UE = Base->use_end();
16169 UI != UE; ++UI) {
16170
16171 SDNode *User = *UI;
16172 if (UI.getUse().getResNo() != Base.getResNo() || User == Addr.getNode() ||
16173 User->getNumOperands() != 2)
16174 continue;
16175
16176 SDValue UserInc = User->getOperand(UI.getOperandNo() == 0 ? 1 : 0);
16177 unsigned UserOffset =
16178 getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG);
16179
16180 if (!UserOffset || UserOffset <= Offset)
16181 continue;
16182
16183 unsigned NewConstInc = UserOffset - Offset;
16184 SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
16185 BaseUpdates.push_back({User, NewInc, NewConstInc});
16186 }
16187 }
16188
16189 // Try to fold the load/store with an update that matches memory
16190 // access size. This should work well for sequential loads.
16191 //
16192 // Filter out invalid updates as well.
16193 unsigned NumValidUpd = BaseUpdates.size();
16194 for (unsigned I = 0; I < NumValidUpd;) {
16195 BaseUpdateUser &User = BaseUpdates[I];
16196 if (!isValidBaseUpdate(N, User.N)) {
16197 --NumValidUpd;
16198 std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
16199 continue;
16200 }
16201
16202 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
16203 return SDValue();
16204 ++I;
16205 }
16206 BaseUpdates.resize(NumValidUpd);
16207
16208 // Try to fold with other users. Non-constant updates are considered
16209 // first, and constant updates are sorted to not break a sequence of
16210 // strided accesses (if there is any).
16211 std::stable_sort(BaseUpdates.begin(), BaseUpdates.end(),
16212 [](const BaseUpdateUser &LHS, const BaseUpdateUser &RHS) {
16213 return LHS.ConstInc < RHS.ConstInc;
16214 });
16215 for (BaseUpdateUser &User : BaseUpdates) {
16216 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI))
16217 return SDValue();
16218 }
16219 return SDValue();
16220}
16221
16224 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16225 return SDValue();
16226
16227 return CombineBaseUpdate(N, DCI);
16228}
16229
16232 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16233 return SDValue();
16234
16235 SelectionDAG &DAG = DCI.DAG;
16236 SDValue Addr = N->getOperand(2);
16237 MemSDNode *MemN = cast<MemSDNode>(N);
16238 SDLoc dl(N);
16239
16240 // For the stores, where there are multiple intrinsics we only actually want
16241 // to post-inc the last of the them.
16242 unsigned IntNo = N->getConstantOperandVal(1);
16243 if (IntNo == Intrinsic::arm_mve_vst2q && N->getConstantOperandVal(5) != 1)
16244 return SDValue();
16245 if (IntNo == Intrinsic::arm_mve_vst4q && N->getConstantOperandVal(7) != 3)
16246 return SDValue();
16247
16248 // Search for a use of the address operand that is an increment.
16249 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
16250 UE = Addr.getNode()->use_end();
16251 UI != UE; ++UI) {
16252 SDNode *User = *UI;
16253 if (User->getOpcode() != ISD::ADD ||
16254 UI.getUse().getResNo() != Addr.getResNo())
16255 continue;
16256
16257 // Check that the add is independent of the load/store. Otherwise, folding
16258 // it would create a cycle. We can avoid searching through Addr as it's a
16259 // predecessor to both.
16262 Visited.insert(Addr.getNode());
16263 Worklist.push_back(N);
16264 Worklist.push_back(User);
16265 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16266 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16267 continue;
16268
16269 // Find the new opcode for the updating load/store.
16270 bool isLoadOp = true;
16271 unsigned NewOpc = 0;
16272 unsigned NumVecs = 0;
16273 switch (IntNo) {
16274 default:
16275 llvm_unreachable("unexpected intrinsic for MVE VLDn combine");
16276 case Intrinsic::arm_mve_vld2q:
16277 NewOpc = ARMISD::VLD2_UPD;
16278 NumVecs = 2;
16279 break;
16280 case Intrinsic::arm_mve_vld4q:
16281 NewOpc = ARMISD::VLD4_UPD;
16282 NumVecs = 4;
16283 break;
16284 case Intrinsic::arm_mve_vst2q:
16285 NewOpc = ARMISD::VST2_UPD;
16286 NumVecs = 2;
16287 isLoadOp = false;
16288 break;
16289 case Intrinsic::arm_mve_vst4q:
16290 NewOpc = ARMISD::VST4_UPD;
16291 NumVecs = 4;
16292 isLoadOp = false;
16293 break;
16294 }
16295
16296 // Find the size of memory referenced by the load/store.
16297 EVT VecTy;
16298 if (isLoadOp) {
16299 VecTy = N->getValueType(0);
16300 } else {
16301 VecTy = N->getOperand(3).getValueType();
16302 }
16303
16304 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
16305
16306 // If the increment is a constant, it must match the memory ref size.
16307 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
16308 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16309 if (!CInc || CInc->getZExtValue() != NumBytes)
16310 continue;
16311
16312 // Create the new updating load/store node.
16313 // First, create an SDVTList for the new updating node's results.
16314 EVT Tys[6];
16315 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16316 unsigned n;
16317 for (n = 0; n < NumResultVecs; ++n)
16318 Tys[n] = VecTy;
16319 Tys[n++] = MVT::i32;
16320 Tys[n] = MVT::Other;
16321 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16322
16323 // Then, gather the new node's operands.
16325 Ops.push_back(N->getOperand(0)); // incoming chain
16326 Ops.push_back(N->getOperand(2)); // ptr
16327 Ops.push_back(Inc);
16328
16329 for (unsigned i = 3; i < N->getNumOperands(); ++i)
16330 Ops.push_back(N->getOperand(i));
16331
16332 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, VecTy,
16333 MemN->getMemOperand());
16334
16335 // Update the uses.
16336 SmallVector<SDValue, 5> NewResults;
16337 for (unsigned i = 0; i < NumResultVecs; ++i)
16338 NewResults.push_back(SDValue(UpdN.getNode(), i));
16339
16340 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16341 DCI.CombineTo(N, NewResults);
16342 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
16343
16344 break;
16345 }
16346
16347 return SDValue();
16348}
16349
16350/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
16351/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
16352/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
16353/// return true.
16355 SelectionDAG &DAG = DCI.DAG;
16356 EVT VT = N->getValueType(0);
16357 // vldN-dup instructions only support 64-bit vectors for N > 1.
16358 if (!VT.is64BitVector())
16359 return false;
16360
16361 // Check if the VDUPLANE operand is a vldN-dup intrinsic.
16362 SDNode *VLD = N->getOperand(0).getNode();
16363 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
16364 return false;
16365 unsigned NumVecs = 0;
16366 unsigned NewOpc = 0;
16367 unsigned IntNo = VLD->getConstantOperandVal(1);
16368 if (IntNo == Intrinsic::arm_neon_vld2lane) {
16369 NumVecs = 2;
16370 NewOpc = ARMISD::VLD2DUP;
16371 } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
16372 NumVecs = 3;
16373 NewOpc = ARMISD::VLD3DUP;
16374 } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
16375 NumVecs = 4;
16376 NewOpc = ARMISD::VLD4DUP;
16377 } else {
16378 return false;
16379 }
16380
16381 // First check that all the vldN-lane uses are VDUPLANEs and that the lane
16382 // numbers match the load.
16383 unsigned VLDLaneNo = VLD->getConstantOperandVal(NumVecs + 3);
16384 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16385 UI != UE; ++UI) {
16386 // Ignore uses of the chain result.
16387 if (UI.getUse().getResNo() == NumVecs)
16388 continue;
16389 SDNode *User = *UI;
16390 if (User->getOpcode() != ARMISD::VDUPLANE ||
16391 VLDLaneNo != User->getConstantOperandVal(1))
16392 return false;
16393 }
16394
16395 // Create the vldN-dup node.
16396 EVT Tys[5];
16397 unsigned n;
16398 for (n = 0; n < NumVecs; ++n)
16399 Tys[n] = VT;
16400 Tys[n] = MVT::Other;
16401 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumVecs + 1));
16402 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
16403 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
16404 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
16405 Ops, VLDMemInt->getMemoryVT(),
16406 VLDMemInt->getMemOperand());
16407
16408 // Update the uses.
16409 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16410 UI != UE; ++UI) {
16411 unsigned ResNo = UI.getUse().getResNo();
16412 // Ignore uses of the chain result.
16413 if (ResNo == NumVecs)
16414 continue;
16415 SDNode *User = *UI;
16416 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
16417 }
16418
16419 // Now the vldN-lane intrinsic is dead except for its chain result.
16420 // Update uses of the chain.
16421 std::vector<SDValue> VLDDupResults;
16422 for (unsigned n = 0; n < NumVecs; ++n)
16423 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
16424 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
16425 DCI.CombineTo(VLD, VLDDupResults);
16426
16427 return true;
16428}
16429
16430/// PerformVDUPLANECombine - Target-specific dag combine xforms for
16431/// ARMISD::VDUPLANE.
16434 const ARMSubtarget *Subtarget) {
16435 SDValue Op = N->getOperand(0);
16436 EVT VT = N->getValueType(0);
16437
16438 // On MVE, we just convert the VDUPLANE to a VDUP with an extract.
16439 if (Subtarget->hasMVEIntegerOps()) {
16440 EVT ExtractVT = VT.getVectorElementType();
16441 // We need to ensure we are creating a legal type.
16442 if (!DCI.DAG.getTargetLoweringInfo().isTypeLegal(ExtractVT))
16443 ExtractVT = MVT::i32;
16444 SDValue Extract = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ExtractVT,
16445 N->getOperand(0), N->getOperand(1));
16446 return DCI.DAG.getNode(ARMISD::VDUP, SDLoc(N), VT, Extract);
16447 }
16448
16449 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
16450 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
16451 if (CombineVLDDUP(N, DCI))
16452 return SDValue(N, 0);
16453
16454 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
16455 // redundant. Ignore bit_converts for now; element sizes are checked below.
16456 while (Op.getOpcode() == ISD::BITCAST)
16457 Op = Op.getOperand(0);
16458 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
16459 return SDValue();
16460
16461 // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
16462 unsigned EltSize = Op.getScalarValueSizeInBits();
16463 // The canonical VMOV for a zero vector uses a 32-bit element size.
16464 unsigned Imm = Op.getConstantOperandVal(0);
16465 unsigned EltBits;
16466 if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0)
16467 EltSize = 8;
16468 if (EltSize > VT.getScalarSizeInBits())
16469 return SDValue();
16470
16471 return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
16472}
16473
16474/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
16476 const ARMSubtarget *Subtarget) {
16477 SDValue Op = N->getOperand(0);
16478 SDLoc dl(N);
16479
16480 if (Subtarget->hasMVEIntegerOps()) {
16481 // Convert VDUP f32 -> VDUP BITCAST i32 under MVE, as we know the value will
16482 // need to come from a GPR.
16483 if (Op.getValueType() == MVT::f32)
16484 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16485 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op));
16486 else if (Op.getValueType() == MVT::f16)
16487 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16488 DAG.getNode(ARMISD::VMOVrh, dl, MVT::i32, Op));
16489 }
16490
16491 if (!Subtarget->hasNEON())
16492 return SDValue();
16493
16494 // Match VDUP(LOAD) -> VLD1DUP.
16495 // We match this pattern here rather than waiting for isel because the
16496 // transform is only legal for unindexed loads.
16497 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
16498 if (LD && Op.hasOneUse() && LD->isUnindexed() &&
16499 LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
16500 SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),
16501 DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)};
16502 SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
16503 SDValue VLDDup =
16504 DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, Ops,
16505 LD->getMemoryVT(), LD->getMemOperand());
16506 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
16507 return VLDDup;
16508 }
16509
16510 return SDValue();
16511}
16512
16515 const ARMSubtarget *Subtarget) {
16516 EVT VT = N->getValueType(0);
16517
16518 // If this is a legal vector load, try to combine it into a VLD1_UPD.
16519 if (Subtarget->hasNEON() && ISD::isNormalLoad(N) && VT.isVector() &&
16521 return CombineBaseUpdate(N, DCI);
16522
16523 return SDValue();
16524}
16525
16526// Optimize trunc store (of multiple scalars) to shuffle and store. First,
16527// pack all of the elements in one place. Next, store to memory in fewer
16528// chunks.
16530 SelectionDAG &DAG) {
16531 SDValue StVal = St->getValue();
16532 EVT VT = StVal.getValueType();
16533 if (!St->isTruncatingStore() || !VT.isVector())
16534 return SDValue();
16535 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16536 EVT StVT = St->getMemoryVT();
16537 unsigned NumElems = VT.getVectorNumElements();
16538 assert(StVT != VT && "Cannot truncate to the same type");
16539 unsigned FromEltSz = VT.getScalarSizeInBits();
16540 unsigned ToEltSz = StVT.getScalarSizeInBits();
16541
16542 // From, To sizes and ElemCount must be pow of two
16543 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz))
16544 return SDValue();
16545
16546 // We are going to use the original vector elt for storing.
16547 // Accumulated smaller vector elements must be a multiple of the store size.
16548 if (0 != (NumElems * FromEltSz) % ToEltSz)
16549 return SDValue();
16550
16551 unsigned SizeRatio = FromEltSz / ToEltSz;
16552 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
16553
16554 // Create a type on which we perform the shuffle.
16555 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
16556 NumElems * SizeRatio);
16557 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
16558
16559 SDLoc DL(St);
16560 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
16561 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
16562 for (unsigned i = 0; i < NumElems; ++i)
16563 ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1
16564 : i * SizeRatio;
16565
16566 // Can't shuffle using an illegal type.
16567 if (!TLI.isTypeLegal(WideVecVT))
16568 return SDValue();
16569
16570 SDValue Shuff = DAG.getVectorShuffle(
16571 WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec);
16572 // At this point all of the data is stored at the bottom of the
16573 // register. We now need to save it to mem.
16574
16575 // Find the largest store unit
16576 MVT StoreType = MVT::i8;
16577 for (MVT Tp : MVT::integer_valuetypes()) {
16578 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
16579 StoreType = Tp;
16580 }
16581 // Didn't find a legal store type.
16582 if (!TLI.isTypeLegal(StoreType))
16583 return SDValue();
16584
16585 // Bitcast the original vector into a vector of store-size units
16586 EVT StoreVecVT =
16587 EVT::getVectorVT(*DAG.getContext(), StoreType,
16588 VT.getSizeInBits() / EVT(StoreType).getSizeInBits());
16589 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
16590 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
16592 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
16593 TLI.getPointerTy(DAG.getDataLayout()));
16594 SDValue BasePtr = St->getBasePtr();
16595
16596 // Perform one or more big stores into memory.
16597 unsigned E = (ToEltSz * NumElems) / StoreType.getSizeInBits();
16598 for (unsigned I = 0; I < E; I++) {
16599 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType,
16600 ShuffWide, DAG.getIntPtrConstant(I, DL));
16601 SDValue Ch =
16602 DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
16603 St->getAlign(), St->getMemOperand()->getFlags());
16604 BasePtr =
16605 DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment);
16606 Chains.push_back(Ch);
16607 }
16608 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
16609}
16610
16611// Try taking a single vector store from an fpround (which would otherwise turn
16612// into an expensive buildvector) and splitting it into a series of narrowing
16613// stores.
16615 SelectionDAG &DAG) {
16616 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16617 return SDValue();
16618 SDValue Trunc = St->getValue();
16619 if (Trunc->getOpcode() != ISD::FP_ROUND)
16620 return SDValue();
16621 EVT FromVT = Trunc->getOperand(0).getValueType();
16622 EVT ToVT = Trunc.getValueType();
16623 if (!ToVT.isVector())
16624 return SDValue();
16626 EVT ToEltVT = ToVT.getVectorElementType();
16627 EVT FromEltVT = FromVT.getVectorElementType();
16628
16629 if (FromEltVT != MVT::f32 || ToEltVT != MVT::f16)
16630 return SDValue();
16631
16632 unsigned NumElements = 4;
16633 if (FromVT.getVectorNumElements() % NumElements != 0)
16634 return SDValue();
16635
16636 // Test if the Trunc will be convertable to a VMOVN with a shuffle, and if so
16637 // use the VMOVN over splitting the store. We are looking for patterns of:
16638 // !rev: 0 N 1 N+1 2 N+2 ...
16639 // rev: N 0 N+1 1 N+2 2 ...
16640 // The shuffle may either be a single source (in which case N = NumElts/2) or
16641 // two inputs extended with concat to the same size (in which case N =
16642 // NumElts).
16643 auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) {
16644 ArrayRef<int> M = SVN->getMask();
16645 unsigned NumElts = ToVT.getVectorNumElements();
16646 if (SVN->getOperand(1).isUndef())
16647 NumElts /= 2;
16648
16649 unsigned Off0 = Rev ? NumElts : 0;
16650 unsigned Off1 = Rev ? 0 : NumElts;
16651
16652 for (unsigned I = 0; I < NumElts; I += 2) {
16653 if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2))
16654 return false;
16655 if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2))
16656 return false;
16657 }
16658
16659 return true;
16660 };
16661
16662 if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0)))
16663 if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true))
16664 return SDValue();
16665
16666 LLVMContext &C = *DAG.getContext();
16667 SDLoc DL(St);
16668 // Details about the old store
16669 SDValue Ch = St->getChain();
16670 SDValue BasePtr = St->getBasePtr();
16671 Align Alignment = St->getOriginalAlign();
16673 AAMDNodes AAInfo = St->getAAInfo();
16674
16675 // We split the store into slices of NumElements. fp16 trunc stores are vcvt
16676 // and then stored as truncating integer stores.
16677 EVT NewFromVT = EVT::getVectorVT(C, FromEltVT, NumElements);
16678 EVT NewToVT = EVT::getVectorVT(
16679 C, EVT::getIntegerVT(C, ToEltVT.getSizeInBits()), NumElements);
16680
16682 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
16683 unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
16684 SDValue NewPtr =
16685 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16686
16687 SDValue Extract =
16688 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
16689 DAG.getConstant(i * NumElements, DL, MVT::i32));
16690
16691 SDValue FPTrunc =
16692 DAG.getNode(ARMISD::VCVTN, DL, MVT::v8f16, DAG.getUNDEF(MVT::v8f16),
16693 Extract, DAG.getConstant(0, DL, MVT::i32));
16694 Extract = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v4i32, FPTrunc);
16695
16696 SDValue Store = DAG.getTruncStore(
16697 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16698 NewToVT, Alignment, MMOFlags, AAInfo);
16699 Stores.push_back(Store);
16700 }
16701 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16702}
16703
16704// Try taking a single vector store from an MVETRUNC (which would otherwise turn
16705// into an expensive buildvector) and splitting it into a series of narrowing
16706// stores.
16708 SelectionDAG &DAG) {
16709 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16710 return SDValue();
16711 SDValue Trunc = St->getValue();
16712 if (Trunc->getOpcode() != ARMISD::MVETRUNC)
16713 return SDValue();
16714 EVT FromVT = Trunc->getOperand(0).getValueType();
16715 EVT ToVT = Trunc.getValueType();
16716
16717 LLVMContext &C = *DAG.getContext();
16718 SDLoc DL(St);
16719 // Details about the old store
16720 SDValue Ch = St->getChain();
16721 SDValue BasePtr = St->getBasePtr();
16722 Align Alignment = St->getOriginalAlign();
16724 AAMDNodes AAInfo = St->getAAInfo();
16725
16726 EVT NewToVT = EVT::getVectorVT(C, ToVT.getVectorElementType(),
16727 FromVT.getVectorNumElements());
16728
16730 for (unsigned i = 0; i < Trunc.getNumOperands(); i++) {
16731 unsigned NewOffset =
16732 i * FromVT.getVectorNumElements() * ToVT.getScalarSizeInBits() / 8;
16733 SDValue NewPtr =
16734 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16735
16736 SDValue Extract = Trunc.getOperand(i);
16737 SDValue Store = DAG.getTruncStore(
16738 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16739 NewToVT, Alignment, MMOFlags, AAInfo);
16740 Stores.push_back(Store);
16741 }
16742 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16743}
16744
16745// Given a floating point store from an extracted vector, with an integer
16746// VGETLANE that already exists, store the existing VGETLANEu directly. This can
16747// help reduce fp register pressure, doesn't require the fp extract and allows
16748// use of more integer post-inc stores not available with vstr.
16750 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16751 return SDValue();
16752 SDValue Extract = St->getValue();
16753 EVT VT = Extract.getValueType();
16754 // For now only uses f16. This may be useful for f32 too, but that will
16755 // be bitcast(extract), not the VGETLANEu we currently check here.
16756 if (VT != MVT::f16 || Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16757 return SDValue();
16758
16759 SDNode *GetLane =
16760 DAG.getNodeIfExists(ARMISD::VGETLANEu, DAG.getVTList(MVT::i32),
16761 {Extract.getOperand(0), Extract.getOperand(1)});
16762 if (!GetLane)
16763 return SDValue();
16764
16765 LLVMContext &C = *DAG.getContext();
16766 SDLoc DL(St);
16767 // Create a new integer store to replace the existing floating point version.
16768 SDValue Ch = St->getChain();
16769 SDValue BasePtr = St->getBasePtr();
16770 Align Alignment = St->getOriginalAlign();
16772 AAMDNodes AAInfo = St->getAAInfo();
16773 EVT NewToVT = EVT::getIntegerVT(C, VT.getSizeInBits());
16774 SDValue Store = DAG.getTruncStore(Ch, DL, SDValue(GetLane, 0), BasePtr,
16775 St->getPointerInfo(), NewToVT, Alignment,
16776 MMOFlags, AAInfo);
16777
16778 return Store;
16779}
16780
16781/// PerformSTORECombine - Target-specific dag combine xforms for
16782/// ISD::STORE.
16785 const ARMSubtarget *Subtarget) {
16786 StoreSDNode *St = cast<StoreSDNode>(N);
16787 if (St->isVolatile())
16788 return SDValue();
16789 SDValue StVal = St->getValue();
16790 EVT VT = StVal.getValueType();
16791
16792 if (Subtarget->hasNEON())
16793 if (SDValue Store = PerformTruncatingStoreCombine(St, DCI.DAG))
16794 return Store;
16795
16796 if (Subtarget->hasMVEFloatOps())
16797 if (SDValue NewToken = PerformSplittingToNarrowingStores(St, DCI.DAG))
16798 return NewToken;
16799
16800 if (Subtarget->hasMVEIntegerOps()) {
16801 if (SDValue NewChain = PerformExtractFpToIntStores(St, DCI.DAG))
16802 return NewChain;
16803 if (SDValue NewToken =
16805 return NewToken;
16806 }
16807
16808 if (!ISD::isNormalStore(St))
16809 return SDValue();
16810
16811 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
16812 // ARM stores of arguments in the same cache line.
16813 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
16814 StVal.getNode()->hasOneUse()) {
16815 SelectionDAG &DAG = DCI.DAG;
16816 bool isBigEndian = DAG.getDataLayout().isBigEndian();
16817 SDLoc DL(St);
16818 SDValue BasePtr = St->getBasePtr();
16819 SDValue NewST1 = DAG.getStore(
16820 St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
16821 BasePtr, St->getPointerInfo(), St->getOriginalAlign(),
16822 St->getMemOperand()->getFlags());
16823
16824 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
16825 DAG.getConstant(4, DL, MVT::i32));
16826 return DAG.getStore(NewST1.getValue(0), DL,
16827 StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
16828 OffsetPtr, St->getPointerInfo().getWithOffset(4),
16829 St->getOriginalAlign(),
16830 St->getMemOperand()->getFlags());
16831 }
16832
16833 if (StVal.getValueType() == MVT::i64 &&
16835
16836 // Bitcast an i64 store extracted from a vector to f64.
16837 // Otherwise, the i64 value will be legalized to a pair of i32 values.
16838 SelectionDAG &DAG = DCI.DAG;
16839 SDLoc dl(StVal);
16840 SDValue IntVec = StVal.getOperand(0);
16841 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
16843 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
16844 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
16845 Vec, StVal.getOperand(1));
16846 dl = SDLoc(N);
16847 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
16848 // Make the DAGCombiner fold the bitcasts.
16849 DCI.AddToWorklist(Vec.getNode());
16850 DCI.AddToWorklist(ExtElt.getNode());
16851 DCI.AddToWorklist(V.getNode());
16852 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
16853 St->getPointerInfo(), St->getAlign(),
16854 St->getMemOperand()->getFlags(), St->getAAInfo());
16855 }
16856
16857 // If this is a legal vector store, try to combine it into a VST1_UPD.
16858 if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() &&
16860 return CombineBaseUpdate(N, DCI);
16861
16862 return SDValue();
16863}
16864
16865/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
16866/// can replace combinations of VMUL and VCVT (floating-point to integer)
16867/// when the VMUL has a constant operand that is a power of 2.
16868///
16869/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
16870/// vmul.f32 d16, d17, d16
16871/// vcvt.s32.f32 d16, d16
16872/// becomes:
16873/// vcvt.s32.f32 d16, d16, #3
16875 const ARMSubtarget *Subtarget) {
16876 if (!Subtarget->hasNEON())
16877 return SDValue();
16878
16879 SDValue Op = N->getOperand(0);
16880 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
16881 Op.getOpcode() != ISD::FMUL)
16882 return SDValue();
16883
16884 SDValue ConstVec = Op->getOperand(1);
16885 if (!isa<BuildVectorSDNode>(ConstVec))
16886 return SDValue();
16887
16888 MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
16889 uint32_t FloatBits = FloatTy.getSizeInBits();
16890 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
16891 uint32_t IntBits = IntTy.getSizeInBits();
16892 unsigned NumLanes = Op.getValueType().getVectorNumElements();
16893 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16894 // These instructions only exist converting from f32 to i32. We can handle
16895 // smaller integers by generating an extra truncate, but larger ones would
16896 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
16897 // these intructions only support v2i32/v4i32 types.
16898 return SDValue();
16899 }
16900
16901 BitVector UndefElements;
16902 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
16903 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
16904 if (C == -1 || C == 0 || C > 32)
16905 return SDValue();
16906
16907 SDLoc dl(N);
16908 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
16909 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
16910 Intrinsic::arm_neon_vcvtfp2fxu;
16911 SDValue FixConv = DAG.getNode(
16912 ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
16913 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
16914 DAG.getConstant(C, dl, MVT::i32));
16915
16916 if (IntBits < FloatBits)
16917 FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
16918
16919 return FixConv;
16920}
16921
16923 const ARMSubtarget *Subtarget) {
16924 if (!Subtarget->hasMVEFloatOps())
16925 return SDValue();
16926
16927 // Turn (fadd x, (vselect c, y, -0.0)) into (vselect c, (fadd x, y), x)
16928 // The second form can be more easily turned into a predicated vadd, and
16929 // possibly combined into a fma to become a predicated vfma.
16930 SDValue Op0 = N->getOperand(0);
16931 SDValue Op1 = N->getOperand(1);
16932 EVT VT = N->getValueType(0);
16933 SDLoc DL(N);
16934
16935 // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set,
16936 // which these VMOV's represent.
16937 auto isIdentitySplat = [&](SDValue Op, bool NSZ) {
16938 if (Op.getOpcode() != ISD::BITCAST ||
16939 Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)
16940 return false;
16941 uint64_t ImmVal = Op.getOperand(0).getConstantOperandVal(0);
16942 if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ)))
16943 return true;
16944 if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ)))
16945 return true;
16946 return false;
16947 };
16948
16949 if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT)
16950 std::swap(Op0, Op1);
16951
16952 if (Op1.getOpcode() != ISD::VSELECT)
16953 return SDValue();
16954
16955 SDNodeFlags FaddFlags = N->getFlags();
16956 bool NSZ = FaddFlags.hasNoSignedZeros();
16957 if (!isIdentitySplat(Op1.getOperand(2), NSZ))
16958 return SDValue();
16959
16960 SDValue FAdd =
16961 DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), FaddFlags);
16962 return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0, FaddFlags);
16963}
16964
16966 SDValue LHS = N->getOperand(0);
16967 SDValue RHS = N->getOperand(1);
16968 EVT VT = N->getValueType(0);
16969 SDLoc DL(N);
16970
16971 if (!N->getFlags().hasAllowReassociation())
16972 return SDValue();
16973
16974 // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)
16975 auto ReassocComplex = [&](SDValue A, SDValue B) {
16976 if (A.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
16977 return SDValue();
16978 unsigned Opc = A.getConstantOperandVal(0);
16979 if (Opc != Intrinsic::arm_mve_vcmlaq)
16980 return SDValue();
16981 SDValue VCMLA = DAG.getNode(
16982 ISD::INTRINSIC_WO_CHAIN, DL, VT, A.getOperand(0), A.getOperand(1),
16983 DAG.getNode(ISD::FADD, DL, VT, A.getOperand(2), B, N->getFlags()),
16984 A.getOperand(3), A.getOperand(4));
16985 VCMLA->setFlags(A->getFlags());
16986 return VCMLA;
16987 };
16988 if (SDValue R = ReassocComplex(LHS, RHS))
16989 return R;
16990 if (SDValue R = ReassocComplex(RHS, LHS))
16991 return R;
16992
16993 return SDValue();
16994}
16995
16997 const ARMSubtarget *Subtarget) {
16998 if (SDValue S = PerformFAddVSelectCombine(N, DAG, Subtarget))
16999 return S;
17000 if (SDValue S = PerformFADDVCMLACombine(N, DAG))
17001 return S;
17002 return SDValue();
17003}
17004
17005/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
17006/// can replace combinations of VCVT (integer to floating-point) and VDIV
17007/// when the VDIV has a constant operand that is a power of 2.
17008///
17009/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
17010/// vcvt.f32.s32 d16, d16
17011/// vdiv.f32 d16, d17, d16
17012/// becomes:
17013/// vcvt.f32.s32 d16, d16, #3
17015 const ARMSubtarget *Subtarget) {
17016 if (!Subtarget->hasNEON())
17017 return SDValue();
17018
17019 SDValue Op = N->getOperand(0);
17020 unsigned OpOpcode = Op.getNode()->getOpcode();
17021 if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
17022 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
17023 return SDValue();
17024
17025 SDValue ConstVec = N->getOperand(1);
17026 if (!isa<BuildVectorSDNode>(ConstVec))
17027 return SDValue();
17028
17029 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
17030 uint32_t FloatBits = FloatTy.getSizeInBits();
17031 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
17032 uint32_t IntBits = IntTy.getSizeInBits();
17033 unsigned NumLanes = Op.getValueType().getVectorNumElements();
17034 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
17035 // These instructions only exist converting from i32 to f32. We can handle
17036 // smaller integers by generating an extra extend, but larger ones would
17037 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
17038 // these intructions only support v2i32/v4i32 types.
17039 return SDValue();
17040 }
17041
17042 BitVector UndefElements;
17043 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
17044 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
17045 if (C == -1 || C == 0 || C > 32)
17046 return SDValue();
17047
17048 SDLoc dl(N);
17049 bool isSigned = OpOpcode == ISD::SINT_TO_FP;
17050 SDValue ConvInput = Op.getOperand(0);
17051 if (IntBits < FloatBits)
17053 dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
17054 ConvInput);
17055
17056 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
17057 Intrinsic::arm_neon_vcvtfxu2fp;
17058 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
17059 Op.getValueType(),
17060 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
17061 ConvInput, DAG.getConstant(C, dl, MVT::i32));
17062}
17063
17065 const ARMSubtarget *ST) {
17066 if (!ST->hasMVEIntegerOps())
17067 return SDValue();
17068
17069 assert(N->getOpcode() == ISD::VECREDUCE_ADD);
17070 EVT ResVT = N->getValueType(0);
17071 SDValue N0 = N->getOperand(0);
17072 SDLoc dl(N);
17073
17074 // Try to turn vecreduce_add(add(x, y)) into vecreduce(x) + vecreduce(y)
17075 if (ResVT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
17076 (N0.getValueType() == MVT::v4i32 || N0.getValueType() == MVT::v8i16 ||
17077 N0.getValueType() == MVT::v16i8)) {
17078 SDValue Red0 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(0));
17079 SDValue Red1 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(1));
17080 return DAG.getNode(ISD::ADD, dl, ResVT, Red0, Red1);
17081 }
17082
17083 // We are looking for something that will have illegal types if left alone,
17084 // but that we can convert to a single instruction under MVE. For example
17085 // vecreduce_add(sext(A, v8i32)) => VADDV.s16 A
17086 // or
17087 // vecreduce_add(mul(zext(A, v16i32), zext(B, v16i32))) => VMLADAV.u8 A, B
17088
17089 // The legal cases are:
17090 // VADDV u/s 8/16/32
17091 // VMLAV u/s 8/16/32
17092 // VADDLV u/s 32
17093 // VMLALV u/s 16/32
17094
17095 // If the input vector is smaller than legal (v4i8/v4i16 for example) we can
17096 // extend it and use v4i32 instead.
17097 auto ExtTypeMatches = [](SDValue A, ArrayRef<MVT> ExtTypes) {
17098 EVT AVT = A.getValueType();
17099 return any_of(ExtTypes, [&](MVT Ty) {
17100 return AVT.getVectorNumElements() == Ty.getVectorNumElements() &&
17101 AVT.bitsLE(Ty);
17102 });
17103 };
17104 auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) {
17105 EVT AVT = A.getValueType();
17106 if (!AVT.is128BitVector())
17107 A = DAG.getNode(ExtendCode, dl,
17109 128 / AVT.getVectorMinNumElements())),
17110 A);
17111 return A;
17112 };
17113 auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes) {
17114 if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
17115 return SDValue();
17116 SDValue A = N0->getOperand(0);
17117 if (ExtTypeMatches(A, ExtTypes))
17118 return ExtendIfNeeded(A, ExtendCode);
17119 return SDValue();
17120 };
17121 auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode,
17122 ArrayRef<MVT> ExtTypes, SDValue &Mask) {
17123 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17125 return SDValue();
17126 Mask = N0->getOperand(0);
17127 SDValue Ext = N0->getOperand(1);
17128 if (Ext->getOpcode() != ExtendCode)
17129 return SDValue();
17130 SDValue A = Ext->getOperand(0);
17131 if (ExtTypeMatches(A, ExtTypes))
17132 return ExtendIfNeeded(A, ExtendCode);
17133 return SDValue();
17134 };
17135 auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17136 SDValue &A, SDValue &B) {
17137 // For a vmla we are trying to match a larger pattern:
17138 // ExtA = sext/zext A
17139 // ExtB = sext/zext B
17140 // Mul = mul ExtA, ExtB
17141 // vecreduce.add Mul
17142 // There might also be en extra extend between the mul and the addreduce, so
17143 // long as the bitwidth is high enough to make them equivalent (for example
17144 // original v8i16 might be mul at v8i32 and the reduce happens at v8i64).
17145 if (ResVT != RetTy)
17146 return false;
17147 SDValue Mul = N0;
17148 if (Mul->getOpcode() == ExtendCode &&
17149 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17150 ResVT.getScalarSizeInBits())
17151 Mul = Mul->getOperand(0);
17152 if (Mul->getOpcode() != ISD::MUL)
17153 return false;
17154 SDValue ExtA = Mul->getOperand(0);
17155 SDValue ExtB = Mul->getOperand(1);
17156 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17157 return false;
17158 A = ExtA->getOperand(0);
17159 B = ExtB->getOperand(0);
17160 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17161 A = ExtendIfNeeded(A, ExtendCode);
17162 B = ExtendIfNeeded(B, ExtendCode);
17163 return true;
17164 }
17165 return false;
17166 };
17167 auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17168 SDValue &A, SDValue &B, SDValue &Mask) {
17169 // Same as the pattern above with a select for the zero predicated lanes
17170 // ExtA = sext/zext A
17171 // ExtB = sext/zext B
17172 // Mul = mul ExtA, ExtB
17173 // N0 = select Mask, Mul, 0
17174 // vecreduce.add N0
17175 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17177 return false;
17178 Mask = N0->getOperand(0);
17179 SDValue Mul = N0->getOperand(1);
17180 if (Mul->getOpcode() == ExtendCode &&
17181 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17182 ResVT.getScalarSizeInBits())
17183 Mul = Mul->getOperand(0);
17184 if (Mul->getOpcode() != ISD::MUL)
17185 return false;
17186 SDValue ExtA = Mul->getOperand(0);
17187 SDValue ExtB = Mul->getOperand(1);
17188 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17189 return false;
17190 A = ExtA->getOperand(0);
17191 B = ExtB->getOperand(0);
17192 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17193 A = ExtendIfNeeded(A, ExtendCode);
17194 B = ExtendIfNeeded(B, ExtendCode);
17195 return true;
17196 }
17197 return false;
17198 };
17199 auto Create64bitNode = [&](unsigned Opcode, ArrayRef<SDValue> Ops) {
17200 // Split illegal MVT::v16i8->i64 vector reductions into two legal v8i16->i64
17201 // reductions. The operands are extended with MVEEXT, but as they are
17202 // reductions the lane orders do not matter. MVEEXT may be combined with
17203 // loads to produce two extending loads, or else they will be expanded to
17204 // VREV/VMOVL.
17205 EVT VT = Ops[0].getValueType();
17206 if (VT == MVT::v16i8) {
17207 assert((Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) &&
17208 "Unexpected illegal long reduction opcode");
17209 bool IsUnsigned = Opcode == ARMISD::VMLALVu;
17210
17211 SDValue Ext0 =
17212 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17213 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[0]);
17214 SDValue Ext1 =
17215 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17216 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[1]);
17217
17218 SDValue MLA0 = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
17219 Ext0, Ext1);
17220 SDValue MLA1 =
17221 DAG.getNode(IsUnsigned ? ARMISD::VMLALVAu : ARMISD::VMLALVAs, dl,
17222 DAG.getVTList(MVT::i32, MVT::i32), MLA0, MLA0.getValue(1),
17223 Ext0.getValue(1), Ext1.getValue(1));
17224 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, MLA1, MLA1.getValue(1));
17225 }
17226 SDValue Node = DAG.getNode(Opcode, dl, {MVT::i32, MVT::i32}, Ops);
17227 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Node,
17228 SDValue(Node.getNode(), 1));
17229 };
17230
17231 SDValue A, B;
17232 SDValue Mask;
17233 if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17234 return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
17235 if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17236 return DAG.getNode(ARMISD::VMLAVu, dl, ResVT, A, B);
17237 if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17238 A, B))
17239 return Create64bitNode(ARMISD::VMLALVs, {A, B});
17240 if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17241 A, B))
17242 return Create64bitNode(ARMISD::VMLALVu, {A, B});
17243 if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B))
17244 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17245 DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B));
17246 if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B))
17247 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17248 DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B));
17249
17250 if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17251 Mask))
17252 return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask);
17253 if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17254 Mask))
17255 return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask);
17256 if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17257 Mask))
17258 return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask});
17259 if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17260 Mask))
17261 return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask});
17262 if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask))
17263 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17264 DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask));
17265 if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask))
17266 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17267 DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
17268
17269 if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
17270 return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
17271 if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
17272 return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
17273 if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
17274 return Create64bitNode(ARMISD::VADDLVs, {A});
17275 if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
17276 return Create64bitNode(ARMISD::VADDLVu, {A});
17277 if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
17278 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17279 DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
17280 if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
17281 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17282 DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
17283
17284 if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17285 return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
17286 if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17287 return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
17288 if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))
17289 return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
17290 if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))
17291 return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
17292 if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
17293 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17294 DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
17295 if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
17296 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17297 DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
17298
17299 // Some complications. We can get a case where the two inputs of the mul are
17300 // the same, then the output sext will have been helpfully converted to a
17301 // zext. Turn it back.
17302 SDValue Op = N0;
17303 if (Op->getOpcode() == ISD::VSELECT)
17304 Op = Op->getOperand(1);
17305 if (Op->getOpcode() == ISD::ZERO_EXTEND &&
17306 Op->getOperand(0)->getOpcode() == ISD::MUL) {
17307 SDValue Mul = Op->getOperand(0);
17308 if (Mul->getOperand(0) == Mul->getOperand(1) &&
17309 Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
17310 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
17311 if (Op != N0)
17312 Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
17313 N0->getOperand(0), Ext, N0->getOperand(2));
17314 return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext);
17315 }
17316 }
17317
17318 return SDValue();
17319}
17320
17321// Looks for vaddv(shuffle) or vmlav(shuffle, shuffle), with a shuffle where all
17322// the lanes are used. Due to the reduction being commutative the shuffle can be
17323// removed.
17325 unsigned VecOp = N->getOperand(0).getValueType().isVector() ? 0 : 2;
17326 auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp));
17327 if (!Shuf || !Shuf->getOperand(1).isUndef())
17328 return SDValue();
17329
17330 // Check all elements are used once in the mask.
17331 ArrayRef<int> Mask = Shuf->getMask();
17332 APInt SetElts(Mask.size(), 0);
17333 for (int E : Mask) {
17334 if (E < 0 || E >= (int)Mask.size())
17335 return SDValue();
17336 SetElts.setBit(E);
17337 }
17338 if (!SetElts.isAllOnes())
17339 return SDValue();
17340
17341 if (N->getNumOperands() != VecOp + 1) {
17342 auto *Shuf2 = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp + 1));
17343 if (!Shuf2 || !Shuf2->getOperand(1).isUndef() || Shuf2->getMask() != Mask)
17344 return SDValue();
17345 }
17346
17348 for (SDValue Op : N->ops()) {
17349 if (Op.getValueType().isVector())
17350 Ops.push_back(Op.getOperand(0));
17351 else
17352 Ops.push_back(Op);
17353 }
17354 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops);
17355}
17356
17359 SDValue Op0 = N->getOperand(0);
17360 SDValue Op1 = N->getOperand(1);
17361 unsigned IsTop = N->getConstantOperandVal(2);
17362
17363 // VMOVNT a undef -> a
17364 // VMOVNB a undef -> a
17365 // VMOVNB undef a -> a
17366 if (Op1->isUndef())
17367 return Op0;
17368 if (Op0->isUndef() && !IsTop)
17369 return Op1;
17370
17371 // VMOVNt(c, VQMOVNb(a, b)) => VQMOVNt(c, b)
17372 // VMOVNb(c, VQMOVNb(a, b)) => VQMOVNb(c, b)
17373 if ((Op1->getOpcode() == ARMISD::VQMOVNs ||
17374 Op1->getOpcode() == ARMISD::VQMOVNu) &&
17375 Op1->getConstantOperandVal(2) == 0)
17376 return DCI.DAG.getNode(Op1->getOpcode(), SDLoc(Op1), N->getValueType(0),
17377 Op0, Op1->getOperand(1), N->getOperand(2));
17378
17379 // Only the bottom lanes from Qm (Op1) and either the top or bottom lanes from
17380 // Qd (Op0) are demanded from a VMOVN, depending on whether we are inserting
17381 // into the top or bottom lanes.
17382 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17383 APInt Op1DemandedElts = APInt::getSplat(NumElts, APInt::getLowBitsSet(2, 1));
17384 APInt Op0DemandedElts =
17385 IsTop ? Op1DemandedElts
17386 : APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1));
17387
17388 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17389 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17390 return SDValue(N, 0);
17391 if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, DCI))
17392 return SDValue(N, 0);
17393
17394 return SDValue();
17395}
17396
17399 SDValue Op0 = N->getOperand(0);
17400 unsigned IsTop = N->getConstantOperandVal(2);
17401
17402 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17403 APInt Op0DemandedElts =
17404 APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
17405 : APInt::getHighBitsSet(2, 1));
17406
17407 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17408 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17409 return SDValue(N, 0);
17410 return SDValue();
17411}
17412
17415 EVT VT = N->getValueType(0);
17416 SDValue LHS = N->getOperand(0);
17417 SDValue RHS = N->getOperand(1);
17418
17419 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
17420 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
17421 // Turn VQDMULH(shuffle, shuffle) -> shuffle(VQDMULH)
17422 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
17423 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
17424 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
17425 SDLoc DL(N);
17426 SDValue NewBinOp = DCI.DAG.getNode(N->getOpcode(), DL, VT,
17427 LHS.getOperand(0), RHS.getOperand(0));
17428 SDValue UndefV = LHS.getOperand(1);
17429 return DCI.DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
17430 }
17431 return SDValue();
17432}
17433
17435 SDLoc DL(N);
17436 SDValue Op0 = N->getOperand(0);
17437 SDValue Op1 = N->getOperand(1);
17438
17439 // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from
17440 // uses of the intrinsics.
17441 if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
17442 int ShiftAmt = C->getSExtValue();
17443 if (ShiftAmt == 0) {
17444 SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL);
17445 DAG.ReplaceAllUsesWith(N, Merge.getNode());
17446 return SDValue();
17447 }
17448
17449 if (ShiftAmt >= -32 && ShiftAmt < 0) {
17450 unsigned NewOpcode =
17451 N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
17452 SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,
17453 DAG.getConstant(-ShiftAmt, DL, MVT::i32));
17454 DAG.ReplaceAllUsesWith(N, NewShift.getNode());
17455 return NewShift;
17456 }
17457 }
17458
17459 return SDValue();
17460}
17461
17462/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
17464 DAGCombinerInfo &DCI) const {
17465 SelectionDAG &DAG = DCI.DAG;
17466 unsigned IntNo = N->getConstantOperandVal(0);
17467 switch (IntNo) {
17468 default:
17469 // Don't do anything for most intrinsics.
17470 break;
17471
17472 // Vector shifts: check for immediate versions and lower them.
17473 // Note: This is done during DAG combining instead of DAG legalizing because
17474 // the build_vectors for 64-bit vector element shift counts are generally
17475 // not legal, and it is hard to see their values after they get legalized to
17476 // loads from a constant pool.
17477 case Intrinsic::arm_neon_vshifts:
17478 case Intrinsic::arm_neon_vshiftu:
17479 case Intrinsic::arm_neon_vrshifts:
17480 case Intrinsic::arm_neon_vrshiftu:
17481 case Intrinsic::arm_neon_vrshiftn:
17482 case Intrinsic::arm_neon_vqshifts:
17483 case Intrinsic::arm_neon_vqshiftu:
17484 case Intrinsic::arm_neon_vqshiftsu:
17485 case Intrinsic::arm_neon_vqshiftns:
17486 case Intrinsic::arm_neon_vqshiftnu:
17487 case Intrinsic::arm_neon_vqshiftnsu:
17488 case Intrinsic::arm_neon_vqrshiftns:
17489 case Intrinsic::arm_neon_vqrshiftnu:
17490 case Intrinsic::arm_neon_vqrshiftnsu: {
17491 EVT VT = N->getOperand(1).getValueType();
17492 int64_t Cnt;
17493 unsigned VShiftOpc = 0;
17494
17495 switch (IntNo) {
17496 case Intrinsic::arm_neon_vshifts:
17497 case Intrinsic::arm_neon_vshiftu:
17498 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
17499 VShiftOpc = ARMISD::VSHLIMM;
17500 break;
17501 }
17502 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
17503 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
17505 break;
17506 }
17507 return SDValue();
17508
17509 case Intrinsic::arm_neon_vrshifts:
17510 case Intrinsic::arm_neon_vrshiftu:
17511 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
17512 break;
17513 return SDValue();
17514
17515 case Intrinsic::arm_neon_vqshifts:
17516 case Intrinsic::arm_neon_vqshiftu:
17517 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17518 break;
17519 return SDValue();
17520
17521 case Intrinsic::arm_neon_vqshiftsu:
17522 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17523 break;
17524 llvm_unreachable("invalid shift count for vqshlu intrinsic");
17525
17526 case Intrinsic::arm_neon_vrshiftn:
17527 case Intrinsic::arm_neon_vqshiftns:
17528 case Intrinsic::arm_neon_vqshiftnu:
17529 case Intrinsic::arm_neon_vqshiftnsu:
17530 case Intrinsic::arm_neon_vqrshiftns:
17531 case Intrinsic::arm_neon_vqrshiftnu:
17532 case Intrinsic::arm_neon_vqrshiftnsu:
17533 // Narrowing shifts require an immediate right shift.
17534 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
17535 break;
17536 llvm_unreachable("invalid shift count for narrowing vector shift "
17537 "intrinsic");
17538
17539 default:
17540 llvm_unreachable("unhandled vector shift");
17541 }
17542
17543 switch (IntNo) {
17544 case Intrinsic::arm_neon_vshifts:
17545 case Intrinsic::arm_neon_vshiftu:
17546 // Opcode already set above.
17547 break;
17548 case Intrinsic::arm_neon_vrshifts:
17549 VShiftOpc = ARMISD::VRSHRsIMM;
17550 break;
17551 case Intrinsic::arm_neon_vrshiftu:
17552 VShiftOpc = ARMISD::VRSHRuIMM;
17553 break;
17554 case Intrinsic::arm_neon_vrshiftn:
17555 VShiftOpc = ARMISD::VRSHRNIMM;
17556 break;
17557 case Intrinsic::arm_neon_vqshifts:
17558 VShiftOpc = ARMISD::VQSHLsIMM;
17559 break;
17560 case Intrinsic::arm_neon_vqshiftu:
17561 VShiftOpc = ARMISD::VQSHLuIMM;
17562 break;
17563 case Intrinsic::arm_neon_vqshiftsu:
17564 VShiftOpc = ARMISD::VQSHLsuIMM;
17565 break;
17566 case Intrinsic::arm_neon_vqshiftns:
17567 VShiftOpc = ARMISD::VQSHRNsIMM;
17568 break;
17569 case Intrinsic::arm_neon_vqshiftnu:
17570 VShiftOpc = ARMISD::VQSHRNuIMM;
17571 break;
17572 case Intrinsic::arm_neon_vqshiftnsu:
17573 VShiftOpc = ARMISD::VQSHRNsuIMM;
17574 break;
17575 case Intrinsic::arm_neon_vqrshiftns:
17576 VShiftOpc = ARMISD::VQRSHRNsIMM;
17577 break;
17578 case Intrinsic::arm_neon_vqrshiftnu:
17579 VShiftOpc = ARMISD::VQRSHRNuIMM;
17580 break;
17581 case Intrinsic::arm_neon_vqrshiftnsu:
17582 VShiftOpc = ARMISD::VQRSHRNsuIMM;
17583 break;
17584 }
17585
17586 SDLoc dl(N);
17587 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17588 N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
17589 }
17590
17591 case Intrinsic::arm_neon_vshiftins: {
17592 EVT VT = N->getOperand(1).getValueType();
17593 int64_t Cnt;
17594 unsigned VShiftOpc = 0;
17595
17596 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
17597 VShiftOpc = ARMISD::VSLIIMM;
17598 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
17599 VShiftOpc = ARMISD::VSRIIMM;
17600 else {
17601 llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
17602 }
17603
17604 SDLoc dl(N);
17605 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17606 N->getOperand(1), N->getOperand(2),
17607 DAG.getConstant(Cnt, dl, MVT::i32));
17608 }
17609
17610 case Intrinsic::arm_neon_vqrshifts:
17611 case Intrinsic::arm_neon_vqrshiftu:
17612 // No immediate versions of these to check for.
17613 break;
17614
17615 case Intrinsic::arm_mve_vqdmlah:
17616 case Intrinsic::arm_mve_vqdmlash:
17617 case Intrinsic::arm_mve_vqrdmlah:
17618 case Intrinsic::arm_mve_vqrdmlash:
17619 case Intrinsic::arm_mve_vmla_n_predicated:
17620 case Intrinsic::arm_mve_vmlas_n_predicated:
17621 case Intrinsic::arm_mve_vqdmlah_predicated:
17622 case Intrinsic::arm_mve_vqdmlash_predicated:
17623 case Intrinsic::arm_mve_vqrdmlah_predicated:
17624 case Intrinsic::arm_mve_vqrdmlash_predicated: {
17625 // These intrinsics all take an i32 scalar operand which is narrowed to the
17626 // size of a single lane of the vector type they return. So we don't need
17627 // any bits of that operand above that point, which allows us to eliminate
17628 // uxth/sxth.
17629 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
17630 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17631 if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))
17632 return SDValue();
17633 break;
17634 }
17635
17636 case Intrinsic::arm_mve_minv:
17637 case Intrinsic::arm_mve_maxv:
17638 case Intrinsic::arm_mve_minav:
17639 case Intrinsic::arm_mve_maxav:
17640 case Intrinsic::arm_mve_minv_predicated:
17641 case Intrinsic::arm_mve_maxv_predicated:
17642 case Intrinsic::arm_mve_minav_predicated:
17643 case Intrinsic::arm_mve_maxav_predicated: {
17644 // These intrinsics all take an i32 scalar operand which is narrowed to the
17645 // size of a single lane of the vector type they take as the other input.
17646 unsigned BitWidth = N->getOperand(2)->getValueType(0).getScalarSizeInBits();
17647 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17648 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
17649 return SDValue();
17650 break;
17651 }
17652
17653 case Intrinsic::arm_mve_addv: {
17654 // Turn this intrinsic straight into the appropriate ARMISD::VADDV node,
17655 // which allow PerformADDVecReduce to turn it into VADDLV when possible.
17656 bool Unsigned = N->getConstantOperandVal(2);
17657 unsigned Opc = Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs;
17658 return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1));
17659 }
17660
17661 case Intrinsic::arm_mve_addlv:
17662 case Intrinsic::arm_mve_addlv_predicated: {
17663 // Same for these, but ARMISD::VADDLV has to be followed by a BUILD_PAIR
17664 // which recombines the two outputs into an i64
17665 bool Unsigned = N->getConstantOperandVal(2);
17666 unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?
17669
17671 for (unsigned i = 1, e = N->getNumOperands(); i < e; i++)
17672 if (i != 2) // skip the unsigned flag
17673 Ops.push_back(N->getOperand(i));
17674
17675 SDLoc dl(N);
17676 SDValue val = DAG.getNode(Opc, dl, {MVT::i32, MVT::i32}, Ops);
17677 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, val.getValue(0),
17678 val.getValue(1));
17679 }
17680 }
17681
17682 return SDValue();
17683}
17684
17685/// PerformShiftCombine - Checks for immediate versions of vector shifts and
17686/// lowers them. As with the vector shift intrinsics, this is done during DAG
17687/// combining instead of DAG legalizing because the build_vectors for 64-bit
17688/// vector element shift counts are generally not legal, and it is hard to see
17689/// their values after they get legalized to loads from a constant pool.
17692 const ARMSubtarget *ST) {
17693 SelectionDAG &DAG = DCI.DAG;
17694 EVT VT = N->getValueType(0);
17695
17696 if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
17697 N->getOperand(0)->getOpcode() == ISD::AND &&
17698 N->getOperand(0)->hasOneUse()) {
17699 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
17700 return SDValue();
17701 // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't
17702 // usually show up because instcombine prefers to canonicalize it to
17703 // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come
17704 // out of GEP lowering in some cases.
17705 SDValue N0 = N->getOperand(0);
17706 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
17707 if (!ShiftAmtNode)
17708 return SDValue();
17709 uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
17710 ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17711 if (!AndMaskNode)
17712 return SDValue();
17713 uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
17714 // Don't transform uxtb/uxth.
17715 if (AndMask == 255 || AndMask == 65535)
17716 return SDValue();
17717 if (isMask_32(AndMask)) {
17718 uint32_t MaskedBits = llvm::countl_zero(AndMask);
17719 if (MaskedBits > ShiftAmt) {
17720 SDLoc DL(N);
17721 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
17722 DAG.getConstant(MaskedBits, DL, MVT::i32));
17723 return DAG.getNode(
17724 ISD::SRL, DL, MVT::i32, SHL,
17725 DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
17726 }
17727 }
17728 }
17729
17730 // Nothing to be done for scalar shifts.
17731 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17732 if (!VT.isVector() || !TLI.isTypeLegal(VT))
17733 return SDValue();
17734 if (ST->hasMVEIntegerOps())
17735 return SDValue();
17736
17737 int64_t Cnt;
17738
17739 switch (N->getOpcode()) {
17740 default: llvm_unreachable("unexpected shift opcode");
17741
17742 case ISD::SHL:
17743 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
17744 SDLoc dl(N);
17745 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
17746 DAG.getConstant(Cnt, dl, MVT::i32));
17747 }
17748 break;
17749
17750 case ISD::SRA:
17751 case ISD::SRL:
17752 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
17753 unsigned VShiftOpc =
17754 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
17755 SDLoc dl(N);
17756 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
17757 DAG.getConstant(Cnt, dl, MVT::i32));
17758 }
17759 }
17760 return SDValue();
17761}
17762
17763// Look for a sign/zero/fpextend extend of a larger than legal load. This can be
17764// split into multiple extending loads, which are simpler to deal with than an
17765// arbitrary extend. For fp extends we use an integer extending load and a VCVTL
17766// to convert the type to an f32.
17768 SDValue N0 = N->getOperand(0);
17769 if (N0.getOpcode() != ISD::LOAD)
17770 return SDValue();
17771 LoadSDNode *LD = cast<LoadSDNode>(N0.getNode());
17772 if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() ||
17773 LD->getExtensionType() != ISD::NON_EXTLOAD)
17774 return SDValue();
17775 EVT FromVT = LD->getValueType(0);
17776 EVT ToVT = N->getValueType(0);
17777 if (!ToVT.isVector())
17778 return SDValue();
17780 EVT ToEltVT = ToVT.getVectorElementType();
17781 EVT FromEltVT = FromVT.getVectorElementType();
17782
17783 unsigned NumElements = 0;
17784 if (ToEltVT == MVT::i32 && FromEltVT == MVT::i8)
17785 NumElements = 4;
17786 if (ToEltVT == MVT::f32 && FromEltVT == MVT::f16)
17787 NumElements = 4;
17788 if (NumElements == 0 ||
17789 (FromEltVT != MVT::f16 && FromVT.getVectorNumElements() == NumElements) ||
17790 FromVT.getVectorNumElements() % NumElements != 0 ||
17791 !isPowerOf2_32(NumElements))
17792 return SDValue();
17793
17794 LLVMContext &C = *DAG.getContext();
17795 SDLoc DL(LD);
17796 // Details about the old load
17797 SDValue Ch = LD->getChain();
17798 SDValue BasePtr = LD->getBasePtr();
17799 Align Alignment = LD->getOriginalAlign();
17800 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
17801 AAMDNodes AAInfo = LD->getAAInfo();
17802
17803 ISD::LoadExtType NewExtType =
17804 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
17805 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
17806 EVT NewFromVT = EVT::getVectorVT(
17807 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
17808 EVT NewToVT = EVT::getVectorVT(
17809 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
17810
17813 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
17814 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
17815 SDValue NewPtr =
17816 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
17817
17818 SDValue NewLoad =
17819 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
17820 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
17821 Alignment, MMOFlags, AAInfo);
17822 Loads.push_back(NewLoad);
17823 Chains.push_back(SDValue(NewLoad.getNode(), 1));
17824 }
17825
17826 // Float truncs need to extended with VCVTB's into their floating point types.
17827 if (FromEltVT == MVT::f16) {
17829
17830 for (unsigned i = 0; i < Loads.size(); i++) {
17831 SDValue LoadBC =
17832 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v8f16, Loads[i]);
17833 SDValue FPExt = DAG.getNode(ARMISD::VCVTL, DL, MVT::v4f32, LoadBC,
17834 DAG.getConstant(0, DL, MVT::i32));
17835 Extends.push_back(FPExt);
17836 }
17837
17838 Loads = Extends;
17839 }
17840
17841 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
17842 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
17843 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Loads);
17844}
17845
17846/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
17847/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
17849 const ARMSubtarget *ST) {
17850 SDValue N0 = N->getOperand(0);
17851
17852 // Check for sign- and zero-extensions of vector extract operations of 8- and
17853 // 16-bit vector elements. NEON and MVE support these directly. They are
17854 // handled during DAG combining because type legalization will promote them
17855 // to 32-bit types and it is messy to recognize the operations after that.
17856 if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
17858 SDValue Vec = N0.getOperand(0);
17859 SDValue Lane = N0.getOperand(1);
17860 EVT VT = N->getValueType(0);
17861 EVT EltVT = N0.getValueType();
17862 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17863
17864 if (VT == MVT::i32 &&
17865 (EltVT == MVT::i8 || EltVT == MVT::i16) &&
17866 TLI.isTypeLegal(Vec.getValueType()) &&
17867 isa<ConstantSDNode>(Lane)) {
17868
17869 unsigned Opc = 0;
17870 switch (N->getOpcode()) {
17871 default: llvm_unreachable("unexpected opcode");
17872 case ISD::SIGN_EXTEND:
17873 Opc = ARMISD::VGETLANEs;
17874 break;
17875 case ISD::ZERO_EXTEND:
17876 case ISD::ANY_EXTEND:
17877 Opc = ARMISD::VGETLANEu;
17878 break;
17879 }
17880 return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
17881 }
17882 }
17883
17884 if (ST->hasMVEIntegerOps())
17885 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17886 return NewLoad;
17887
17888 return SDValue();
17889}
17890
17892 const ARMSubtarget *ST) {
17893 if (ST->hasMVEFloatOps())
17894 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17895 return NewLoad;
17896
17897 return SDValue();
17898}
17899
17900// Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating
17901// constant bounds.
17903 const ARMSubtarget *Subtarget) {
17904 if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
17905 !Subtarget->isThumb2())
17906 return SDValue();
17907
17908 EVT VT = Op.getValueType();
17909 SDValue Op0 = Op.getOperand(0);
17910
17911 if (VT != MVT::i32 ||
17912 (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) ||
17913 !isa<ConstantSDNode>(Op.getOperand(1)) ||
17914 !isa<ConstantSDNode>(Op0.getOperand(1)))
17915 return SDValue();
17916
17917 SDValue Min = Op;
17918 SDValue Max = Op0;
17919 SDValue Input = Op0.getOperand(0);
17920 if (Min.getOpcode() == ISD::SMAX)
17921 std::swap(Min, Max);
17922
17923 APInt MinC = Min.getConstantOperandAPInt(1);
17924 APInt MaxC = Max.getConstantOperandAPInt(1);
17925
17926 if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX ||
17927 !(MinC + 1).isPowerOf2())
17928 return SDValue();
17929
17930 SDLoc DL(Op);
17931 if (MinC == ~MaxC)
17932 return DAG.getNode(ARMISD::SSAT, DL, VT, Input,
17933 DAG.getConstant(MinC.countr_one(), DL, VT));
17934 if (MaxC == 0)
17935 return DAG.getNode(ARMISD::USAT, DL, VT, Input,
17936 DAG.getConstant(MinC.countr_one(), DL, VT));
17937
17938 return SDValue();
17939}
17940
17941/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
17942/// saturates.
17944 const ARMSubtarget *ST) {
17945 EVT VT = N->getValueType(0);
17946 SDValue N0 = N->getOperand(0);
17947
17948 if (VT == MVT::i32)
17949 return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST);
17950
17951 if (!ST->hasMVEIntegerOps())
17952 return SDValue();
17953
17954 if (SDValue V = PerformVQDMULHCombine(N, DAG))
17955 return V;
17956
17957 if (VT != MVT::v4i32 && VT != MVT::v8i16)
17958 return SDValue();
17959
17960 auto IsSignedSaturate = [&](SDNode *Min, SDNode *Max) {
17961 // Check one is a smin and the other is a smax
17962 if (Min->getOpcode() != ISD::SMIN)
17963 std::swap(Min, Max);
17964 if (Min->getOpcode() != ISD::SMIN || Max->getOpcode() != ISD::SMAX)
17965 return false;
17966
17967 APInt SaturateC;
17968 if (VT == MVT::v4i32)
17969 SaturateC = APInt(32, (1 << 15) - 1, true);
17970 else //if (VT == MVT::v8i16)
17971 SaturateC = APInt(16, (1 << 7) - 1, true);
17972
17973 APInt MinC, MaxC;
17974 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
17975 MinC != SaturateC)
17976 return false;
17977 if (!ISD::isConstantSplatVector(Max->getOperand(1).getNode(), MaxC) ||
17978 MaxC != ~SaturateC)
17979 return false;
17980 return true;
17981 };
17982
17983 if (IsSignedSaturate(N, N0.getNode())) {
17984 SDLoc DL(N);
17985 MVT ExtVT, HalfVT;
17986 if (VT == MVT::v4i32) {
17987 HalfVT = MVT::v8i16;
17988 ExtVT = MVT::v4i16;
17989 } else { // if (VT == MVT::v8i16)
17990 HalfVT = MVT::v16i8;
17991 ExtVT = MVT::v8i8;
17992 }
17993
17994 // Create a VQMOVNB with undef top lanes, then signed extended into the top
17995 // half. That extend will hopefully be removed if only the bottom bits are
17996 // demanded (though a truncating store, for example).
17997 SDValue VQMOVN =
17998 DAG.getNode(ARMISD::VQMOVNs, DL, HalfVT, DAG.getUNDEF(HalfVT),
17999 N0->getOperand(0), DAG.getConstant(0, DL, MVT::i32));
18000 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
18001 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Bitcast,
18002 DAG.getValueType(ExtVT));
18003 }
18004
18005 auto IsUnsignedSaturate = [&](SDNode *Min) {
18006 // For unsigned, we just need to check for <= 0xffff
18007 if (Min->getOpcode() != ISD::UMIN)
18008 return false;
18009
18010 APInt SaturateC;
18011 if (VT == MVT::v4i32)
18012 SaturateC = APInt(32, (1 << 16) - 1, true);
18013 else //if (VT == MVT::v8i16)
18014 SaturateC = APInt(16, (1 << 8) - 1, true);
18015
18016 APInt MinC;
18017 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18018 MinC != SaturateC)
18019 return false;
18020 return true;
18021 };
18022
18023 if (IsUnsignedSaturate(N)) {
18024 SDLoc DL(N);
18025 MVT HalfVT;
18026 unsigned ExtConst;
18027 if (VT == MVT::v4i32) {
18028 HalfVT = MVT::v8i16;
18029 ExtConst = 0x0000FFFF;
18030 } else { //if (VT == MVT::v8i16)
18031 HalfVT = MVT::v16i8;
18032 ExtConst = 0x00FF;
18033 }
18034
18035 // Create a VQMOVNB with undef top lanes, then ZExt into the top half with
18036 // an AND. That extend will hopefully be removed if only the bottom bits are
18037 // demanded (though a truncating store, for example).
18038 SDValue VQMOVN =
18039 DAG.getNode(ARMISD::VQMOVNu, DL, HalfVT, DAG.getUNDEF(HalfVT), N0,
18040 DAG.getConstant(0, DL, MVT::i32));
18041 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
18042 return DAG.getNode(ISD::AND, DL, VT, Bitcast,
18043 DAG.getConstant(ExtConst, DL, VT));
18044 }
18045
18046 return SDValue();
18047}
18048
18050 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
18051 if (!C)
18052 return nullptr;
18053 const APInt *CV = &C->getAPIntValue();
18054 return CV->isPowerOf2() ? CV : nullptr;
18055}
18056
18058 // If we have a CMOV, OR and AND combination such as:
18059 // if (x & CN)
18060 // y |= CM;
18061 //
18062 // And:
18063 // * CN is a single bit;
18064 // * All bits covered by CM are known zero in y
18065 //
18066 // Then we can convert this into a sequence of BFI instructions. This will
18067 // always be a win if CM is a single bit, will always be no worse than the
18068 // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
18069 // three bits (due to the extra IT instruction).
18070
18071 SDValue Op0 = CMOV->getOperand(0);
18072 SDValue Op1 = CMOV->getOperand(1);
18073 auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue();
18074 SDValue CmpZ = CMOV->getOperand(4);
18075
18076 // The compare must be against zero.
18077 if (!isNullConstant(CmpZ->getOperand(1)))
18078 return SDValue();
18079
18080 assert(CmpZ->getOpcode() == ARMISD::CMPZ);
18081 SDValue And = CmpZ->getOperand(0);
18082 if (And->getOpcode() != ISD::AND)
18083 return SDValue();
18084 const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
18085 if (!AndC)
18086 return SDValue();
18087 SDValue X = And->getOperand(0);
18088
18089 if (CC == ARMCC::EQ) {
18090 // We're performing an "equal to zero" compare. Swap the operands so we
18091 // canonicalize on a "not equal to zero" compare.
18092 std::swap(Op0, Op1);
18093 } else {
18094 assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
18095 }
18096
18097 if (Op1->getOpcode() != ISD::OR)
18098 return SDValue();
18099
18100 ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
18101 if (!OrC)
18102 return SDValue();
18103 SDValue Y = Op1->getOperand(0);
18104
18105 if (Op0 != Y)
18106 return SDValue();
18107
18108 // Now, is it profitable to continue?
18109 APInt OrCI = OrC->getAPIntValue();
18110 unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
18111 if (OrCI.popcount() > Heuristic)
18112 return SDValue();
18113
18114 // Lastly, can we determine that the bits defined by OrCI
18115 // are zero in Y?
18116 KnownBits Known = DAG.computeKnownBits(Y);
18117 if ((OrCI & Known.Zero) != OrCI)
18118 return SDValue();
18119
18120 // OK, we can do the combine.
18121 SDValue V = Y;
18122 SDLoc dl(X);
18123 EVT VT = X.getValueType();
18124 unsigned BitInX = AndC->logBase2();
18125
18126 if (BitInX != 0) {
18127 // We must shift X first.
18128 X = DAG.getNode(ISD::SRL, dl, VT, X,
18129 DAG.getConstant(BitInX, dl, VT));
18130 }
18131
18132 for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
18133 BitInY < NumActiveBits; ++BitInY) {
18134 if (OrCI[BitInY] == 0)
18135 continue;
18136 APInt Mask(VT.getSizeInBits(), 0);
18137 Mask.setBit(BitInY);
18138 V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
18139 // Confusingly, the operand is an *inverted* mask.
18140 DAG.getConstant(~Mask, dl, VT));
18141 }
18142
18143 return V;
18144}
18145
18146// Given N, the value controlling the conditional branch, search for the loop
18147// intrinsic, returning it, along with how the value is used. We need to handle
18148// patterns such as the following:
18149// (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit)
18150// (brcond (setcc (loop.decrement), 0, eq), exit)
18151// (brcond (setcc (loop.decrement), 0, ne), header)
18153 bool &Negate) {
18154 switch (N->getOpcode()) {
18155 default:
18156 break;
18157 case ISD::XOR: {
18158 if (!isa<ConstantSDNode>(N.getOperand(1)))
18159 return SDValue();
18160 if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())
18161 return SDValue();
18162 Negate = !Negate;
18163 return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate);
18164 }
18165 case ISD::SETCC: {
18166 auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));
18167 if (!Const)
18168 return SDValue();
18169 if (Const->isZero())
18170 Imm = 0;
18171 else if (Const->isOne())
18172 Imm = 1;
18173 else
18174 return SDValue();
18175 CC = cast<CondCodeSDNode>(N.getOperand(2))->get();
18176 return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
18177 }
18179 unsigned IntOp = N.getConstantOperandVal(1);
18180 if (IntOp != Intrinsic::test_start_loop_iterations &&
18181 IntOp != Intrinsic::loop_decrement_reg)
18182 return SDValue();
18183 return N;
18184 }
18185 }
18186 return SDValue();
18187}
18188
18191 const ARMSubtarget *ST) {
18192
18193 // The hwloop intrinsics that we're interested are used for control-flow,
18194 // either for entering or exiting the loop:
18195 // - test.start.loop.iterations will test whether its operand is zero. If it
18196 // is zero, the proceeding branch should not enter the loop.
18197 // - loop.decrement.reg also tests whether its operand is zero. If it is
18198 // zero, the proceeding branch should not branch back to the beginning of
18199 // the loop.
18200 // So here, we need to check that how the brcond is using the result of each
18201 // of the intrinsics to ensure that we're branching to the right place at the
18202 // right time.
18203
18205 SDValue Cond;
18206 int Imm = 1;
18207 bool Negate = false;
18208 SDValue Chain = N->getOperand(0);
18209 SDValue Dest;
18210
18211 if (N->getOpcode() == ISD::BRCOND) {
18212 CC = ISD::SETEQ;
18213 Cond = N->getOperand(1);
18214 Dest = N->getOperand(2);
18215 } else {
18216 assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!");
18217 CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18218 Cond = N->getOperand(2);
18219 Dest = N->getOperand(4);
18220 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
18221 if (!Const->isOne() && !Const->isZero())
18222 return SDValue();
18223 Imm = Const->getZExtValue();
18224 } else
18225 return SDValue();
18226 }
18227
18228 SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate);
18229 if (!Int)
18230 return SDValue();
18231
18232 if (Negate)
18233 CC = ISD::getSetCCInverse(CC, /* Integer inverse */ MVT::i32);
18234
18235 auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) {
18236 return (CC == ISD::SETEQ && Imm == 0) ||
18237 (CC == ISD::SETNE && Imm == 1) ||
18238 (CC == ISD::SETLT && Imm == 1) ||
18239 (CC == ISD::SETULT && Imm == 1);
18240 };
18241
18242 auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) {
18243 return (CC == ISD::SETEQ && Imm == 1) ||
18244 (CC == ISD::SETNE && Imm == 0) ||
18245 (CC == ISD::SETGT && Imm == 0) ||
18246 (CC == ISD::SETUGT && Imm == 0) ||
18247 (CC == ISD::SETGE && Imm == 1) ||
18248 (CC == ISD::SETUGE && Imm == 1);
18249 };
18250
18251 assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&
18252 "unsupported condition");
18253
18254 SDLoc dl(Int);
18255 SelectionDAG &DAG = DCI.DAG;
18256 SDValue Elements = Int.getOperand(2);
18257 unsigned IntOp = Int->getConstantOperandVal(1);
18258 assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR)
18259 && "expected single br user");
18260 SDNode *Br = *N->use_begin();
18261 SDValue OtherTarget = Br->getOperand(1);
18262
18263 // Update the unconditional branch to branch to the given Dest.
18264 auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) {
18265 SDValue NewBrOps[] = { Br->getOperand(0), Dest };
18266 SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps);
18267 DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr);
18268 };
18269
18270 if (IntOp == Intrinsic::test_start_loop_iterations) {
18271 SDValue Res;
18272 SDValue Setup = DAG.getNode(ARMISD::WLSSETUP, dl, MVT::i32, Elements);
18273 // We expect this 'instruction' to branch when the counter is zero.
18274 if (IsTrueIfZero(CC, Imm)) {
18275 SDValue Ops[] = {Chain, Setup, Dest};
18276 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18277 } else {
18278 // The logic is the reverse of what we need for WLS, so find the other
18279 // basic block target: the target of the proceeding br.
18280 UpdateUncondBr(Br, Dest, DAG);
18281
18282 SDValue Ops[] = {Chain, Setup, OtherTarget};
18283 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18284 }
18285 // Update LR count to the new value
18286 DAG.ReplaceAllUsesOfValueWith(Int.getValue(0), Setup);
18287 // Update chain
18288 DAG.ReplaceAllUsesOfValueWith(Int.getValue(2), Int.getOperand(0));
18289 return Res;
18290 } else {
18291 SDValue Size =
18292 DAG.getTargetConstant(Int.getConstantOperandVal(3), dl, MVT::i32);
18293 SDValue Args[] = { Int.getOperand(0), Elements, Size, };
18294 SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl,
18295 DAG.getVTList(MVT::i32, MVT::Other), Args);
18296 DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode());
18297
18298 // We expect this instruction to branch when the count is not zero.
18299 SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;
18300
18301 // Update the unconditional branch to target the loop preheader if we've
18302 // found the condition has been reversed.
18303 if (Target == OtherTarget)
18304 UpdateUncondBr(Br, Dest, DAG);
18305
18306 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18307 SDValue(LoopDec.getNode(), 1), Chain);
18308
18309 SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target };
18310 return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs);
18311 }
18312 return SDValue();
18313}
18314
18315/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
18316SDValue
18318 SDValue Cmp = N->getOperand(4);
18319 if (Cmp.getOpcode() != ARMISD::CMPZ)
18320 // Only looking at NE cases.
18321 return SDValue();
18322
18323 EVT VT = N->getValueType(0);
18324 SDLoc dl(N);
18325 SDValue LHS = Cmp.getOperand(0);
18326 SDValue RHS = Cmp.getOperand(1);
18327 SDValue Chain = N->getOperand(0);
18328 SDValue BB = N->getOperand(1);
18329 SDValue ARMcc = N->getOperand(2);
18331
18332 // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
18333 // -> (brcond Chain BB CC CPSR Cmp)
18334 if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
18335 LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
18336 LHS->getOperand(0)->hasOneUse() &&
18337 isNullConstant(LHS->getOperand(0)->getOperand(0)) &&
18338 isOneConstant(LHS->getOperand(0)->getOperand(1)) &&
18339 isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) {
18340 return DAG.getNode(
18341 ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
18342 LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
18343 }
18344
18345 return SDValue();
18346}
18347
18348/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
18349SDValue
18351 SDValue Cmp = N->getOperand(4);
18352 if (Cmp.getOpcode() != ARMISD::CMPZ)
18353 // Only looking at EQ and NE cases.
18354 return SDValue();
18355
18356 EVT VT = N->getValueType(0);
18357 SDLoc dl(N);
18358 SDValue LHS = Cmp.getOperand(0);
18359 SDValue RHS = Cmp.getOperand(1);
18360 SDValue FalseVal = N->getOperand(0);
18361 SDValue TrueVal = N->getOperand(1);
18362 SDValue ARMcc = N->getOperand(2);
18364
18365 // BFI is only available on V6T2+.
18366 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
18368 if (R)
18369 return R;
18370 }
18371
18372 // Simplify
18373 // mov r1, r0
18374 // cmp r1, x
18375 // mov r0, y
18376 // moveq r0, x
18377 // to
18378 // cmp r0, x
18379 // movne r0, y
18380 //
18381 // mov r1, r0
18382 // cmp r1, x
18383 // mov r0, x
18384 // movne r0, y
18385 // to
18386 // cmp r0, x
18387 // movne r0, y
18388 /// FIXME: Turn this into a target neutral optimization?
18389 SDValue Res;
18390 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
18391 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
18392 N->getOperand(3), Cmp);
18393 } else if (CC == ARMCC::EQ && TrueVal == RHS) {
18394 SDValue ARMcc;
18395 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
18396 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
18397 N->getOperand(3), NewCmp);
18398 }
18399
18400 // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
18401 // -> (cmov F T CC CPSR Cmp)
18402 if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&
18403 isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
18405 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
18406 LHS->getOperand(2), LHS->getOperand(3),
18407 LHS->getOperand(4));
18408 }
18409
18410 if (!VT.isInteger())
18411 return SDValue();
18412
18413 // Fold away an unneccessary CMPZ/CMOV
18414 // CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) ->
18415 // if C1==EQ -> CMOV A, B, C2, $cpsr, D
18416 // if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D
18417 if (N->getConstantOperandVal(2) == ARMCC::EQ ||
18418 N->getConstantOperandVal(2) == ARMCC::NE) {
18420 if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) {
18421 if (N->getConstantOperandVal(2) == ARMCC::NE)
18423 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
18424 N->getOperand(1),
18425 DAG.getTargetConstant(Cond, SDLoc(N), MVT::i32),
18426 N->getOperand(3), C);
18427 }
18428 }
18429
18430 // Materialize a boolean comparison for integers so we can avoid branching.
18431 if (isNullConstant(FalseVal)) {
18432 if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
18433 if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
18434 // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
18435 // right 5 bits will make that 32 be 1, otherwise it will be 0.
18436 // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
18437 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18438 Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub),
18439 DAG.getConstant(5, dl, MVT::i32));
18440 } else {
18441 // CMOV 0, 1, ==, (CMPZ x, y) ->
18442 // (UADDO_CARRY (SUB x, y), t:0, t:1)
18443 // where t = (USUBO_CARRY 0, (SUB x, y), 0)
18444 //
18445 // The USUBO_CARRY computes 0 - (x - y) and this will give a borrow when
18446 // x != y. In other words, a carry C == 1 when x == y, C == 0
18447 // otherwise.
18448 // The final UADDO_CARRY computes
18449 // x - y + (0 - (x - y)) + C == C
18450 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18451 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18452 SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);
18453 // ISD::USUBO_CARRY returns a borrow but we want the carry here
18454 // actually.
18455 SDValue Carry =
18456 DAG.getNode(ISD::SUB, dl, MVT::i32,
18457 DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
18458 Res = DAG.getNode(ISD::UADDO_CARRY, dl, VTs, Sub, Neg, Carry);
18459 }
18460 } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
18461 (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
18462 // This seems pointless but will allow us to combine it further below.
18463 // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
18464 SDValue Sub =
18465 DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18466 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
18467 Sub.getValue(1), SDValue());
18468 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
18469 N->getOperand(3), CPSRGlue.getValue(1));
18470 FalseVal = Sub;
18471 }
18472 } else if (isNullConstant(TrueVal)) {
18473 if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
18474 (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
18475 // This seems pointless but will allow us to combine it further below
18476 // Note that we change == for != as this is the dual for the case above.
18477 // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
18478 SDValue Sub =
18479 DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18480 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
18481 Sub.getValue(1), SDValue());
18482 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
18483 DAG.getConstant(ARMCC::NE, dl, MVT::i32),
18484 N->getOperand(3), CPSRGlue.getValue(1));
18485 FalseVal = Sub;
18486 }
18487 }
18488
18489 // On Thumb1, the DAG above may be further combined if z is a power of 2
18490 // (z == 2 ^ K).
18491 // CMOV (SUBS x, y), z, !=, (SUBS x, y):1 ->
18492 // t1 = (USUBO (SUB x, y), 1)
18493 // t2 = (USUBO_CARRY (SUB x, y), t1:0, t1:1)
18494 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18495 //
18496 // This also handles the special case of comparing against zero; it's
18497 // essentially, the same pattern, except there's no SUBS:
18498 // CMOV x, z, !=, (CMPZ x, 0) ->
18499 // t1 = (USUBO x, 1)
18500 // t2 = (USUBO_CARRY x, t1:0, t1:1)
18501 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18502 const APInt *TrueConst;
18503 if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
18504 ((FalseVal.getOpcode() == ARMISD::SUBS &&
18505 FalseVal.getOperand(0) == LHS && FalseVal.getOperand(1) == RHS) ||
18506 (FalseVal == LHS && isNullConstant(RHS))) &&
18507 (TrueConst = isPowerOf2Constant(TrueVal))) {
18508 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18509 unsigned ShiftAmount = TrueConst->logBase2();
18510 if (ShiftAmount)
18511 TrueVal = DAG.getConstant(1, dl, VT);
18512 SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
18513 Res = DAG.getNode(ISD::USUBO_CARRY, dl, VTs, FalseVal, Subc,
18514 Subc.getValue(1));
18515
18516 if (ShiftAmount)
18517 Res = DAG.getNode(ISD::SHL, dl, VT, Res,
18518 DAG.getConstant(ShiftAmount, dl, MVT::i32));
18519 }
18520
18521 if (Res.getNode()) {
18522 KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
18523 // Capture demanded bits information that would be otherwise lost.
18524 if (Known.Zero == 0xfffffffe)
18525 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18526 DAG.getValueType(MVT::i1));
18527 else if (Known.Zero == 0xffffff00)
18528 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18529 DAG.getValueType(MVT::i8));
18530 else if (Known.Zero == 0xffff0000)
18531 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18532 DAG.getValueType(MVT::i16));
18533 }
18534
18535 return Res;
18536}
18537
18540 const ARMSubtarget *ST) {
18541 SelectionDAG &DAG = DCI.DAG;
18542 SDValue Src = N->getOperand(0);
18543 EVT DstVT = N->getValueType(0);
18544
18545 // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.
18546 if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
18547 EVT SrcVT = Src.getValueType();
18548 if (SrcVT.getScalarSizeInBits() == DstVT.getScalarSizeInBits())
18549 return DAG.getNode(ARMISD::VDUP, SDLoc(N), DstVT, Src.getOperand(0));
18550 }
18551
18552 // We may have a bitcast of something that has already had this bitcast
18553 // combine performed on it, so skip past any VECTOR_REG_CASTs.
18554 while (Src.getOpcode() == ARMISD::VECTOR_REG_CAST)
18555 Src = Src.getOperand(0);
18556
18557 // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
18558 // would be generated is at least the width of the element type.
18559 EVT SrcVT = Src.getValueType();
18560 if ((Src.getOpcode() == ARMISD::VMOVIMM ||
18561 Src.getOpcode() == ARMISD::VMVNIMM ||
18562 Src.getOpcode() == ARMISD::VMOVFPIMM) &&
18563 SrcVT.getScalarSizeInBits() <= DstVT.getScalarSizeInBits() &&
18564 DAG.getDataLayout().isBigEndian())
18565 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(N), DstVT, Src);
18566
18567 // bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD x
18568 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
18569 return R;
18570
18571 return SDValue();
18572}
18573
18574// Some combines for the MVETrunc truncations legalizer helper. Also lowers the
18575// node into stack operations after legalizeOps.
18578 SelectionDAG &DAG = DCI.DAG;
18579 EVT VT = N->getValueType(0);
18580 SDLoc DL(N);
18581
18582 // MVETrunc(Undef, Undef) -> Undef
18583 if (all_of(N->ops(), [](SDValue Op) { return Op.isUndef(); }))
18584 return DAG.getUNDEF(VT);
18585
18586 // MVETrunc(MVETrunc a b, MVETrunc c, d) -> MVETrunc
18587 if (N->getNumOperands() == 2 &&
18588 N->getOperand(0).getOpcode() == ARMISD::MVETRUNC &&
18589 N->getOperand(1).getOpcode() == ARMISD::MVETRUNC)
18590 return DAG.getNode(ARMISD::MVETRUNC, DL, VT, N->getOperand(0).getOperand(0),
18591 N->getOperand(0).getOperand(1),
18592 N->getOperand(1).getOperand(0),
18593 N->getOperand(1).getOperand(1));
18594
18595 // MVETrunc(shuffle, shuffle) -> VMOVN
18596 if (N->getNumOperands() == 2 &&
18597 N->getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
18598 N->getOperand(1).getOpcode() == ISD::VECTOR_SHUFFLE) {
18599 auto *S0 = cast<ShuffleVectorSDNode>(N->getOperand(0).getNode());
18600 auto *S1 = cast<ShuffleVectorSDNode>(N->getOperand(1).getNode());
18601
18602 if (S0->getOperand(0) == S1->getOperand(0) &&
18603 S0->getOperand(1) == S1->getOperand(1)) {
18604 // Construct complete shuffle mask
18605 SmallVector<int, 8> Mask(S0->getMask());
18606 Mask.append(S1->getMask().begin(), S1->getMask().end());
18607
18608 if (isVMOVNTruncMask(Mask, VT, false))
18609 return DAG.getNode(
18610 ARMISD::VMOVN, DL, VT,
18611 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18612 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18613 DAG.getConstant(1, DL, MVT::i32));
18614 if (isVMOVNTruncMask(Mask, VT, true))
18615 return DAG.getNode(
18616 ARMISD::VMOVN, DL, VT,
18617 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18618 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18619 DAG.getConstant(1, DL, MVT::i32));
18620 }
18621 }
18622
18623 // For MVETrunc of a buildvector or shuffle, it can be beneficial to lower the
18624 // truncate to a buildvector to allow the generic optimisations to kick in.
18625 if (all_of(N->ops(), [](SDValue Op) {
18626 return Op.getOpcode() == ISD::BUILD_VECTOR ||
18627 Op.getOpcode() == ISD::VECTOR_SHUFFLE ||
18628 (Op.getOpcode() == ISD::BITCAST &&
18629 Op.getOperand(0).getOpcode() == ISD::BUILD_VECTOR);
18630 })) {
18631 SmallVector<SDValue, 8> Extracts;
18632 for (unsigned Op = 0; Op < N->getNumOperands(); Op++) {
18633 SDValue O = N->getOperand(Op);
18634 for (unsigned i = 0; i < O.getValueType().getVectorNumElements(); i++) {
18635 SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, O,
18636 DAG.getConstant(i, DL, MVT::i32));
18637 Extracts.push_back(Ext);
18638 }
18639 }
18640 return DAG.getBuildVector(VT, DL, Extracts);
18641 }
18642
18643 // If we are late in the legalization process and nothing has optimised
18644 // the trunc to anything better, lower it to a stack store and reload,
18645 // performing the truncation whilst keeping the lanes in the correct order:
18646 // VSTRH.32 a, stack; VSTRH.32 b, stack+8; VLDRW.32 stack;
18647 if (!DCI.isAfterLegalizeDAG())
18648 return SDValue();
18649
18650 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18651 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18652 int NumIns = N->getNumOperands();
18653 assert((NumIns == 2 || NumIns == 4) &&
18654 "Expected 2 or 4 inputs to an MVETrunc");
18655 EVT StoreVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
18656 if (N->getNumOperands() == 4)
18657 StoreVT = StoreVT.getHalfNumVectorElementsVT(*DAG.getContext());
18658
18659 SmallVector<SDValue> Chains;
18660 for (int I = 0; I < NumIns; I++) {
18661 SDValue Ptr = DAG.getNode(
18662 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18663 DAG.getConstant(I * 16 / NumIns, DL, StackPtr.getValueType()));
18665 DAG.getMachineFunction(), SPFI, I * 16 / NumIns);
18666 SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), DL, N->getOperand(I),
18667 Ptr, MPI, StoreVT, Align(4));
18668 Chains.push_back(Ch);
18669 }
18670
18671 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18672 MachinePointerInfo MPI =
18674 return DAG.getLoad(VT, DL, Chain, StackPtr, MPI, Align(4));
18675}
18676
18677// Take a MVEEXT(load x) and split that into (extload x, extload x+8)
18679 SelectionDAG &DAG) {
18680 SDValue N0 = N->getOperand(0);
18681 LoadSDNode *LD = dyn_cast<LoadSDNode>(N0.getNode());
18682 if (!LD || !LD->isSimple() || !N0.hasOneUse() || LD->isIndexed())
18683 return SDValue();
18684
18685 EVT FromVT = LD->getMemoryVT();
18686 EVT ToVT = N->getValueType(0);
18687 if (!ToVT.isVector())
18688 return SDValue();
18689 assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements() * 2);
18690 EVT ToEltVT = ToVT.getVectorElementType();
18691 EVT FromEltVT = FromVT.getVectorElementType();
18692
18693 unsigned NumElements = 0;
18694 if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8))
18695 NumElements = 4;
18696 if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)
18697 NumElements = 8;
18698 assert(NumElements != 0);
18699
18700 ISD::LoadExtType NewExtType =
18701 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
18702 if (LD->getExtensionType() != ISD::NON_EXTLOAD &&
18703 LD->getExtensionType() != ISD::EXTLOAD &&
18704 LD->getExtensionType() != NewExtType)
18705 return SDValue();
18706
18707 LLVMContext &C = *DAG.getContext();
18708 SDLoc DL(LD);
18709 // Details about the old load
18710 SDValue Ch = LD->getChain();
18711 SDValue BasePtr = LD->getBasePtr();
18712 Align Alignment = LD->getOriginalAlign();
18713 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
18714 AAMDNodes AAInfo = LD->getAAInfo();
18715
18716 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
18717 EVT NewFromVT = EVT::getVectorVT(
18718 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
18719 EVT NewToVT = EVT::getVectorVT(
18720 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
18721
18724 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
18725 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
18726 SDValue NewPtr =
18727 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
18728
18729 SDValue NewLoad =
18730 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
18731 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
18732 Alignment, MMOFlags, AAInfo);
18733 Loads.push_back(NewLoad);
18734 Chains.push_back(SDValue(NewLoad.getNode(), 1));
18735 }
18736
18737 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18738 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
18739 return DAG.getMergeValues(Loads, DL);
18740}
18741
18742// Perform combines for MVEEXT. If it has not be optimized to anything better
18743// before lowering, it gets converted to stack store and extloads performing the
18744// extend whilst still keeping the same lane ordering.
18747 SelectionDAG &DAG = DCI.DAG;
18748 EVT VT = N->getValueType(0);
18749 SDLoc DL(N);
18750 assert(N->getNumValues() == 2 && "Expected MVEEXT with 2 elements");
18751 assert((VT == MVT::v4i32 || VT == MVT::v8i16) && "Unexpected MVEEXT type");
18752
18753 EVT ExtVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18754 *DAG.getContext());
18755 auto Extend = [&](SDValue V) {
18756 SDValue VVT = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, V);
18757 return N->getOpcode() == ARMISD::MVESEXT
18758 ? DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, VVT,
18759 DAG.getValueType(ExtVT))
18760 : DAG.getZeroExtendInReg(VVT, DL, ExtVT);
18761 };
18762
18763 // MVEEXT(VDUP) -> SIGN_EXTEND_INREG(VDUP)
18764 if (N->getOperand(0).getOpcode() == ARMISD::VDUP) {
18765 SDValue Ext = Extend(N->getOperand(0));
18766 return DAG.getMergeValues({Ext, Ext}, DL);
18767 }
18768
18769 // MVEEXT(shuffle) -> SIGN_EXTEND_INREG/ZERO_EXTEND_INREG
18770 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0))) {
18771 ArrayRef<int> Mask = SVN->getMask();
18772 assert(Mask.size() == 2 * VT.getVectorNumElements());
18773 assert(Mask.size() == SVN->getValueType(0).getVectorNumElements());
18774 unsigned Rev = VT == MVT::v4i32 ? ARMISD::VREV32 : ARMISD::VREV16;
18775 SDValue Op0 = SVN->getOperand(0);
18776 SDValue Op1 = SVN->getOperand(1);
18777
18778 auto CheckInregMask = [&](int Start, int Offset) {
18779 for (int Idx = 0, E = VT.getVectorNumElements(); Idx < E; ++Idx)
18780 if (Mask[Start + Idx] >= 0 && Mask[Start + Idx] != Idx * 2 + Offset)
18781 return false;
18782 return true;
18783 };
18784 SDValue V0 = SDValue(N, 0);
18785 SDValue V1 = SDValue(N, 1);
18786 if (CheckInregMask(0, 0))
18787 V0 = Extend(Op0);
18788 else if (CheckInregMask(0, 1))
18789 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18790 else if (CheckInregMask(0, Mask.size()))
18791 V0 = Extend(Op1);
18792 else if (CheckInregMask(0, Mask.size() + 1))
18793 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18794
18795 if (CheckInregMask(VT.getVectorNumElements(), Mask.size()))
18796 V1 = Extend(Op1);
18797 else if (CheckInregMask(VT.getVectorNumElements(), Mask.size() + 1))
18798 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18799 else if (CheckInregMask(VT.getVectorNumElements(), 0))
18800 V1 = Extend(Op0);
18801 else if (CheckInregMask(VT.getVectorNumElements(), 1))
18802 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18803
18804 if (V0.getNode() != N || V1.getNode() != N)
18805 return DAG.getMergeValues({V0, V1}, DL);
18806 }
18807
18808 // MVEEXT(load) -> extload, extload
18809 if (N->getOperand(0)->getOpcode() == ISD::LOAD)
18811 return L;
18812
18813 if (!DCI.isAfterLegalizeDAG())
18814 return SDValue();
18815
18816 // Lower to a stack store and reload:
18817 // VSTRW.32 a, stack; VLDRH.32 stack; VLDRH.32 stack+8;
18818 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18819 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18820 int NumOuts = N->getNumValues();
18821 assert((NumOuts == 2 || NumOuts == 4) &&
18822 "Expected 2 or 4 outputs to an MVEEXT");
18823 EVT LoadVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18824 *DAG.getContext());
18825 if (N->getNumOperands() == 4)
18826 LoadVT = LoadVT.getHalfNumVectorElementsVT(*DAG.getContext());
18827
18828 MachinePointerInfo MPI =
18830 SDValue Chain = DAG.getStore(DAG.getEntryNode(), DL, N->getOperand(0),
18831 StackPtr, MPI, Align(4));
18832
18834 for (int I = 0; I < NumOuts; I++) {
18835 SDValue Ptr = DAG.getNode(
18836 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18837 DAG.getConstant(I * 16 / NumOuts, DL, StackPtr.getValueType()));
18839 DAG.getMachineFunction(), SPFI, I * 16 / NumOuts);
18840 SDValue Load = DAG.getExtLoad(
18841 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL,
18842 VT, Chain, Ptr, MPI, LoadVT, Align(4));
18843 Loads.push_back(Load);
18844 }
18845
18846 return DAG.getMergeValues(Loads, DL);
18847}
18848
18850 DAGCombinerInfo &DCI) const {
18851 switch (N->getOpcode()) {
18852 default: break;
18853 case ISD::SELECT_CC:
18854 case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
18855 case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
18856 case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
18857 case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
18858 case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
18859 case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
18860 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
18861 case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);
18862 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
18863 case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
18864 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
18865 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
18866 case ISD::BRCOND:
18867 case ISD::BR_CC: return PerformHWLoopCombine(N, DCI, Subtarget);
18868 case ARMISD::ADDC:
18869 case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
18870 case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
18871 case ARMISD::BFI: return PerformBFICombine(N, DCI.DAG);
18872 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
18873 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
18874 case ARMISD::VMOVhr: return PerformVMOVhrCombine(N, DCI);
18875 case ARMISD::VMOVrh: return PerformVMOVrhCombine(N, DCI.DAG);
18876 case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
18877 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
18880 return PerformExtractEltCombine(N, DCI, Subtarget);
18884 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
18885 case ARMISD::VDUP: return PerformVDUPCombine(N, DCI.DAG, Subtarget);
18886 case ISD::FP_TO_SINT:
18887 case ISD::FP_TO_UINT:
18888 return PerformVCVTCombine(N, DCI.DAG, Subtarget);
18889 case ISD::FADD:
18890 return PerformFADDCombine(N, DCI.DAG, Subtarget);
18891 case ISD::FDIV:
18892 return PerformVDIVCombine(N, DCI.DAG, Subtarget);
18894 return PerformIntrinsicCombine(N, DCI);
18895 case ISD::SHL:
18896 case ISD::SRA:
18897 case ISD::SRL:
18898 return PerformShiftCombine(N, DCI, Subtarget);
18899 case ISD::SIGN_EXTEND:
18900 case ISD::ZERO_EXTEND:
18901 case ISD::ANY_EXTEND:
18902 return PerformExtendCombine(N, DCI.DAG, Subtarget);
18903 case ISD::FP_EXTEND:
18904 return PerformFPExtendCombine(N, DCI.DAG, Subtarget);
18905 case ISD::SMIN:
18906 case ISD::UMIN:
18907 case ISD::SMAX:
18908 case ISD::UMAX:
18909 return PerformMinMaxCombine(N, DCI.DAG, Subtarget);
18910 case ARMISD::CMOV:
18911 return PerformCMOVCombine(N, DCI.DAG);
18912 case ARMISD::BRCOND:
18913 return PerformBRCONDCombine(N, DCI.DAG);
18914 case ARMISD::CMPZ:
18915 return PerformCMPZCombine(N, DCI.DAG);
18916 case ARMISD::CSINC:
18917 case ARMISD::CSINV:
18918 case ARMISD::CSNEG:
18919 return PerformCSETCombine(N, DCI.DAG);
18920 case ISD::LOAD:
18921 return PerformLOADCombine(N, DCI, Subtarget);
18922 case ARMISD::VLD1DUP:
18923 case ARMISD::VLD2DUP:
18924 case ARMISD::VLD3DUP:
18925 case ARMISD::VLD4DUP:
18926 return PerformVLDCombine(N, DCI);
18928 return PerformARMBUILD_VECTORCombine(N, DCI);
18929 case ISD::BITCAST:
18930 return PerformBITCASTCombine(N, DCI, Subtarget);
18932 return PerformPREDICATE_CASTCombine(N, DCI);
18934 return PerformVECTOR_REG_CASTCombine(N, DCI.DAG, Subtarget);
18935 case ARMISD::MVETRUNC:
18936 return PerformMVETruncCombine(N, DCI);
18937 case ARMISD::MVESEXT:
18938 case ARMISD::MVEZEXT:
18939 return PerformMVEExtCombine(N, DCI);
18940 case ARMISD::VCMP:
18941 return PerformVCMPCombine(N, DCI.DAG, Subtarget);
18942 case ISD::VECREDUCE_ADD:
18943 return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);
18944 case ARMISD::VADDVs:
18945 case ARMISD::VADDVu:
18946 case ARMISD::VADDLVs:
18947 case ARMISD::VADDLVu:
18948 case ARMISD::VADDLVAs:
18949 case ARMISD::VADDLVAu:
18950 case ARMISD::VMLAVs:
18951 case ARMISD::VMLAVu:
18952 case ARMISD::VMLALVs:
18953 case ARMISD::VMLALVu:
18954 case ARMISD::VMLALVAs:
18955 case ARMISD::VMLALVAu:
18956 return PerformReduceShuffleCombine(N, DCI.DAG);
18957 case ARMISD::VMOVN:
18958 return PerformVMOVNCombine(N, DCI);
18959 case ARMISD::VQMOVNs:
18960 case ARMISD::VQMOVNu:
18961 return PerformVQMOVNCombine(N, DCI);
18962 case ARMISD::VQDMULH:
18963 return PerformVQDMULHCombine(N, DCI);
18964 case ARMISD::ASRL:
18965 case ARMISD::LSRL:
18966 case ARMISD::LSLL:
18967 return PerformLongShiftCombine(N, DCI.DAG);
18968 case ARMISD::SMULWB: {
18969 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18970 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
18971 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
18972 return SDValue();
18973 break;
18974 }
18975 case ARMISD::SMULWT: {
18976 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18977 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
18978 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
18979 return SDValue();
18980 break;
18981 }
18982 case ARMISD::SMLALBB:
18983 case ARMISD::QADD16b:
18984 case ARMISD::QSUB16b:
18985 case ARMISD::UQADD16b:
18986 case ARMISD::UQSUB16b: {
18987 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18988 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
18989 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
18990 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
18991 return SDValue();
18992 break;
18993 }
18994 case ARMISD::SMLALBT: {
18995 unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
18996 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
18997 unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
18998 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
18999 if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
19000 (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
19001 return SDValue();
19002 break;
19003 }
19004 case ARMISD::SMLALTB: {
19005 unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
19006 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
19007 unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
19008 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
19009 if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
19010 (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
19011 return SDValue();
19012 break;
19013 }
19014 case ARMISD::SMLALTT: {
19015 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19016 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
19017 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19018 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19019 return SDValue();
19020 break;
19021 }
19022 case ARMISD::QADD8b:
19023 case ARMISD::QSUB8b:
19024 case ARMISD::UQADD8b:
19025 case ARMISD::UQSUB8b: {
19026 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19027 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
19028 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19029 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19030 return SDValue();
19031 break;
19032 }
19035 switch (N->getConstantOperandVal(1)) {
19036 case Intrinsic::arm_neon_vld1:
19037 case Intrinsic::arm_neon_vld1x2:
19038 case Intrinsic::arm_neon_vld1x3:
19039 case Intrinsic::arm_neon_vld1x4:
19040 case Intrinsic::arm_neon_vld2:
19041 case Intrinsic::arm_neon_vld3:
19042 case Intrinsic::arm_neon_vld4:
19043 case Intrinsic::arm_neon_vld2lane:
19044 case Intrinsic::arm_neon_vld3lane:
19045 case Intrinsic::arm_neon_vld4lane:
19046 case Intrinsic::arm_neon_vld2dup:
19047 case Intrinsic::arm_neon_vld3dup:
19048 case Intrinsic::arm_neon_vld4dup:
19049 case Intrinsic::arm_neon_vst1:
19050 case Intrinsic::arm_neon_vst1x2:
19051 case Intrinsic::arm_neon_vst1x3:
19052 case Intrinsic::arm_neon_vst1x4:
19053 case Intrinsic::arm_neon_vst2:
19054 case Intrinsic::arm_neon_vst3:
19055 case Intrinsic::arm_neon_vst4:
19056 case Intrinsic::arm_neon_vst2lane:
19057 case Intrinsic::arm_neon_vst3lane:
19058 case Intrinsic::arm_neon_vst4lane:
19059 return PerformVLDCombine(N, DCI);
19060 case Intrinsic::arm_mve_vld2q:
19061 case Intrinsic::arm_mve_vld4q:
19062 case Intrinsic::arm_mve_vst2q:
19063 case Intrinsic::arm_mve_vst4q:
19064 return PerformMVEVLDCombine(N, DCI);
19065 default: break;
19066 }
19067 break;
19068 }
19069 return SDValue();
19070}
19071
19073 EVT VT) const {
19074 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
19075}
19076
19078 Align Alignment,
19080 unsigned *Fast) const {
19081 // Depends what it gets converted into if the type is weird.
19082 if (!VT.isSimple())
19083 return false;
19084
19085 // The AllowsUnaligned flag models the SCTLR.A setting in ARM cpus
19086 bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
19087 auto Ty = VT.getSimpleVT().SimpleTy;
19088
19089 if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
19090 // Unaligned access can use (for example) LRDB, LRDH, LDR
19091 if (AllowsUnaligned) {
19092 if (Fast)
19093 *Fast = Subtarget->hasV7Ops();
19094 return true;
19095 }
19096 }
19097
19098 if (Ty == MVT::f64 || Ty == MVT::v2f64) {
19099 // For any little-endian targets with neon, we can support unaligned ld/st
19100 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
19101 // A big-endian target may also explicitly support unaligned accesses
19102 if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
19103 if (Fast)
19104 *Fast = 1;
19105 return true;
19106 }
19107 }
19108
19109 if (!Subtarget->hasMVEIntegerOps())
19110 return false;
19111
19112 // These are for predicates
19113 if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||
19114 Ty == MVT::v2i1)) {
19115 if (Fast)
19116 *Fast = 1;
19117 return true;
19118 }
19119
19120 // These are for truncated stores/narrowing loads. They are fine so long as
19121 // the alignment is at least the size of the item being loaded
19122 if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
19123 Alignment >= VT.getScalarSizeInBits() / 8) {
19124 if (Fast)
19125 *Fast = true;
19126 return true;
19127 }
19128
19129 // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
19130 // VSTRW.U32 all store the vector register in exactly the same format, and
19131 // differ only in the range of their immediate offset field and the required
19132 // alignment. So there is always a store that can be used, regardless of
19133 // actual type.
19134 //
19135 // For big endian, that is not the case. But can still emit a (VSTRB.U8;
19136 // VREV64.8) pair and get the same effect. This will likely be better than
19137 // aligning the vector through the stack.
19138 if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
19139 Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
19140 Ty == MVT::v2f64) {
19141 if (Fast)
19142 *Fast = 1;
19143 return true;
19144 }
19145
19146 return false;
19147}
19148
19149
19151 const MemOp &Op, const AttributeList &FuncAttributes) const {
19152 // See if we can use NEON instructions for this...
19153 if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
19154 !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
19155 unsigned Fast;
19156 if (Op.size() >= 16 &&
19157 (Op.isAligned(Align(16)) ||
19158 (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, Align(1),
19160 Fast))) {
19161 return MVT::v2f64;
19162 } else if (Op.size() >= 8 &&
19163 (Op.isAligned(Align(8)) ||
19165 MVT::f64, 0, Align(1), MachineMemOperand::MONone, &Fast) &&
19166 Fast))) {
19167 return MVT::f64;
19168 }
19169 }
19170
19171 // Let the target-independent logic figure it out.
19172 return MVT::Other;
19173}
19174
19175// 64-bit integers are split into their high and low parts and held in two
19176// different registers, so the trunc is free since the low register can just
19177// be used.
19178bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
19179 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
19180 return false;
19181 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
19182 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
19183 return (SrcBits == 64 && DestBits == 32);
19184}
19185
19187 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
19188 !DstVT.isInteger())
19189 return false;
19190 unsigned SrcBits = SrcVT.getSizeInBits();
19191 unsigned DestBits = DstVT.getSizeInBits();
19192 return (SrcBits == 64 && DestBits == 32);
19193}
19194
19196 if (Val.getOpcode() != ISD::LOAD)
19197 return false;
19198
19199 EVT VT1 = Val.getValueType();
19200 if (!VT1.isSimple() || !VT1.isInteger() ||
19201 !VT2.isSimple() || !VT2.isInteger())
19202 return false;
19203
19204 switch (VT1.getSimpleVT().SimpleTy) {
19205 default: break;
19206 case MVT::i1:
19207 case MVT::i8:
19208 case MVT::i16:
19209 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
19210 return true;
19211 }
19212
19213 return false;
19214}
19215
19217 if (!VT.isSimple())
19218 return false;
19219
19220 // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that
19221 // negate values directly (fneg is free). So, we don't want to let the DAG
19222 // combiner rewrite fneg into xors and some other instructions. For f16 and
19223 // FullFP16 argument passing, some bitcast nodes may be introduced,
19224 // triggering this DAG combine rewrite, so we are avoiding that with this.
19225 switch (VT.getSimpleVT().SimpleTy) {
19226 default: break;
19227 case MVT::f16:
19228 return Subtarget->hasFullFP16();
19229 }
19230
19231 return false;
19232}
19233
19234/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
19235/// of the vector elements.
19236static bool areExtractExts(Value *Ext1, Value *Ext2) {
19237 auto areExtDoubled = [](Instruction *Ext) {
19238 return Ext->getType()->getScalarSizeInBits() ==
19239 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
19240 };
19241
19242 if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
19243 !match(Ext2, m_ZExtOrSExt(m_Value())) ||
19244 !areExtDoubled(cast<Instruction>(Ext1)) ||
19245 !areExtDoubled(cast<Instruction>(Ext2)))
19246 return false;
19247
19248 return true;
19249}
19250
19251/// Check if sinking \p I's operands to I's basic block is profitable, because
19252/// the operands can be folded into a target instruction, e.g.
19253/// sext/zext can be folded into vsubl.
19255 SmallVectorImpl<Use *> &Ops) const {
19256 if (!I->getType()->isVectorTy())
19257 return false;
19258
19259 if (Subtarget->hasNEON()) {
19260 switch (I->getOpcode()) {
19261 case Instruction::Sub:
19262 case Instruction::Add: {
19263 if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
19264 return false;
19265 Ops.push_back(&I->getOperandUse(0));
19266 Ops.push_back(&I->getOperandUse(1));
19267 return true;
19268 }
19269 default:
19270 return false;
19271 }
19272 }
19273
19274 if (!Subtarget->hasMVEIntegerOps())
19275 return false;
19276
19277 auto IsFMSMul = [&](Instruction *I) {
19278 if (!I->hasOneUse())
19279 return false;
19280 auto *Sub = cast<Instruction>(*I->users().begin());
19281 return Sub->getOpcode() == Instruction::FSub && Sub->getOperand(1) == I;
19282 };
19283 auto IsFMS = [&](Instruction *I) {
19284 if (match(I->getOperand(0), m_FNeg(m_Value())) ||
19285 match(I->getOperand(1), m_FNeg(m_Value())))
19286 return true;
19287 return false;
19288 };
19289
19290 auto IsSinker = [&](Instruction *I, int Operand) {
19291 switch (I->getOpcode()) {
19292 case Instruction::Add:
19293 case Instruction::Mul:
19294 case Instruction::FAdd:
19295 case Instruction::ICmp:
19296 case Instruction::FCmp:
19297 return true;
19298 case Instruction::FMul:
19299 return !IsFMSMul(I);
19300 case Instruction::Sub:
19301 case Instruction::FSub:
19302 case Instruction::Shl:
19303 case Instruction::LShr:
19304 case Instruction::AShr:
19305 return Operand == 1;
19306 case Instruction::Call:
19307 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
19308 switch (II->getIntrinsicID()) {
19309 case Intrinsic::fma:
19310 return !IsFMS(I);
19311 case Intrinsic::sadd_sat:
19312 case Intrinsic::uadd_sat:
19313 case Intrinsic::arm_mve_add_predicated:
19314 case Intrinsic::arm_mve_mul_predicated:
19315 case Intrinsic::arm_mve_qadd_predicated:
19316 case Intrinsic::arm_mve_vhadd:
19317 case Intrinsic::arm_mve_hadd_predicated:
19318 case Intrinsic::arm_mve_vqdmull:
19319 case Intrinsic::arm_mve_vqdmull_predicated:
19320 case Intrinsic::arm_mve_vqdmulh:
19321 case Intrinsic::arm_mve_qdmulh_predicated:
19322 case Intrinsic::arm_mve_vqrdmulh:
19323 case Intrinsic::arm_mve_qrdmulh_predicated:
19324 case Intrinsic::arm_mve_fma_predicated:
19325 return true;
19326 case Intrinsic::ssub_sat:
19327 case Intrinsic::usub_sat:
19328 case Intrinsic::arm_mve_sub_predicated:
19329 case Intrinsic::arm_mve_qsub_predicated:
19330 case Intrinsic::arm_mve_hsub_predicated:
19331 case Intrinsic::arm_mve_vhsub:
19332 return Operand == 1;
19333 default:
19334 return false;
19335 }
19336 }
19337 return false;
19338 default:
19339 return false;
19340 }
19341 };
19342
19343 for (auto OpIdx : enumerate(I->operands())) {
19344 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
19345 // Make sure we are not already sinking this operand
19346 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
19347 continue;
19348
19349 Instruction *Shuffle = Op;
19350 if (Shuffle->getOpcode() == Instruction::BitCast)
19351 Shuffle = dyn_cast<Instruction>(Shuffle->getOperand(0));
19352 // We are looking for a splat that can be sunk.
19353 if (!Shuffle ||
19354 !match(Shuffle, m_Shuffle(
19356 m_Undef(), m_ZeroMask())))
19357 continue;
19358 if (!IsSinker(I, OpIdx.index()))
19359 continue;
19360
19361 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
19362 // and vector registers
19363 for (Use &U : Op->uses()) {
19364 Instruction *Insn = cast<Instruction>(U.getUser());
19365 if (!IsSinker(Insn, U.getOperandNo()))
19366 return false;
19367 }
19368
19369 Ops.push_back(&Shuffle->getOperandUse(0));
19370 if (Shuffle != Op)
19371 Ops.push_back(&Op->getOperandUse(0));
19372 Ops.push_back(&OpIdx.value());
19373 }
19374 return true;
19375}
19376
19378 if (!Subtarget->hasMVEIntegerOps())
19379 return nullptr;
19380 Type *SVIType = SVI->getType();
19381 Type *ScalarType = SVIType->getScalarType();
19382
19383 if (ScalarType->isFloatTy())
19384 return Type::getInt32Ty(SVIType->getContext());
19385 if (ScalarType->isHalfTy())
19386 return Type::getInt16Ty(SVIType->getContext());
19387 return nullptr;
19388}
19389
19391 EVT VT = ExtVal.getValueType();
19392
19393 if (!isTypeLegal(VT))
19394 return false;
19395
19396 if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.getOperand(0))) {
19397 if (Ld->isExpandingLoad())
19398 return false;
19399 }
19400
19401 if (Subtarget->hasMVEIntegerOps())
19402 return true;
19403
19404 // Don't create a loadext if we can fold the extension into a wide/long
19405 // instruction.
19406 // If there's more than one user instruction, the loadext is desirable no
19407 // matter what. There can be two uses by the same instruction.
19408 if (ExtVal->use_empty() ||
19409 !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
19410 return true;
19411
19412 SDNode *U = *ExtVal->use_begin();
19413 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
19414 U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
19415 return false;
19416
19417 return true;
19418}
19419
19421 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19422 return false;
19423
19424 if (!isTypeLegal(EVT::getEVT(Ty1)))
19425 return false;
19426
19427 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
19428
19429 // Assuming the caller doesn't have a zeroext or signext return parameter,
19430 // truncation all the way down to i1 is valid.
19431 return true;
19432}
19433
19434/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
19435/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
19436/// expanded to FMAs when this method returns true, otherwise fmuladd is
19437/// expanded to fmul + fadd.
19438///
19439/// ARM supports both fused and unfused multiply-add operations; we already
19440/// lower a pair of fmul and fadd to the latter so it's not clear that there
19441/// would be a gain or that the gain would be worthwhile enough to risk
19442/// correctness bugs.
19443///
19444/// For MVE, we set this to true as it helps simplify the need for some
19445/// patterns (and we don't have the non-fused floating point instruction).
19446bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
19447 EVT VT) const {
19448 if (!VT.isSimple())
19449 return false;
19450
19451 switch (VT.getSimpleVT().SimpleTy) {
19452 case MVT::v4f32:
19453 case MVT::v8f16:
19454 return Subtarget->hasMVEFloatOps();
19455 case MVT::f16:
19456 return Subtarget->useFPVFMx16();
19457 case MVT::f32:
19458 return Subtarget->useFPVFMx();
19459 case MVT::f64:
19460 return Subtarget->useFPVFMx64();
19461 default:
19462 break;
19463 }
19464
19465 return false;
19466}
19467
19468static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
19469 if (V < 0)
19470 return false;
19471
19472 unsigned Scale = 1;
19473 switch (VT.getSimpleVT().SimpleTy) {
19474 case MVT::i1:
19475 case MVT::i8:
19476 // Scale == 1;
19477 break;
19478 case MVT::i16:
19479 // Scale == 2;
19480 Scale = 2;
19481 break;
19482 default:
19483 // On thumb1 we load most things (i32, i64, floats, etc) with a LDR
19484 // Scale == 4;
19485 Scale = 4;
19486 break;
19487 }
19488
19489 if ((V & (Scale - 1)) != 0)
19490 return false;
19491 return isUInt<5>(V / Scale);
19492}
19493
19494static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
19495 const ARMSubtarget *Subtarget) {
19496 if (!VT.isInteger() && !VT.isFloatingPoint())
19497 return false;
19498 if (VT.isVector() && Subtarget->hasNEON())
19499 return false;
19500 if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
19501 !Subtarget->hasMVEFloatOps())
19502 return false;
19503
19504 bool IsNeg = false;
19505 if (V < 0) {
19506 IsNeg = true;
19507 V = -V;
19508 }
19509
19510 unsigned NumBytes = std::max((unsigned)VT.getSizeInBits() / 8, 1U);
19511
19512 // MVE: size * imm7
19513 if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
19514 switch (VT.getSimpleVT().getVectorElementType().SimpleTy) {
19515 case MVT::i32:
19516 case MVT::f32:
19517 return isShiftedUInt<7,2>(V);
19518 case MVT::i16:
19519 case MVT::f16:
19520 return isShiftedUInt<7,1>(V);
19521 case MVT::i8:
19522 return isUInt<7>(V);
19523 default:
19524 return false;
19525 }
19526 }
19527
19528 // half VLDR: 2 * imm8
19529 if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
19530 return isShiftedUInt<8, 1>(V);
19531 // VLDR and LDRD: 4 * imm8
19532 if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
19533 return isShiftedUInt<8, 2>(V);
19534
19535 if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
19536 // + imm12 or - imm8
19537 if (IsNeg)
19538 return isUInt<8>(V);
19539 return isUInt<12>(V);
19540 }
19541
19542 return false;
19543}
19544
19545/// isLegalAddressImmediate - Return true if the integer value can be used
19546/// as the offset of the target addressing mode for load / store of the
19547/// given type.
19548static bool isLegalAddressImmediate(int64_t V, EVT VT,
19549 const ARMSubtarget *Subtarget) {
19550 if (V == 0)
19551 return true;
19552
19553 if (!VT.isSimple())
19554 return false;
19555
19556 if (Subtarget->isThumb1Only())
19557 return isLegalT1AddressImmediate(V, VT);
19558 else if (Subtarget->isThumb2())
19559 return isLegalT2AddressImmediate(V, VT, Subtarget);
19560
19561 // ARM mode.
19562 if (V < 0)
19563 V = - V;
19564 switch (VT.getSimpleVT().SimpleTy) {
19565 default: return false;
19566 case MVT::i1:
19567 case MVT::i8:
19568 case MVT::i32:
19569 // +- imm12
19570 return isUInt<12>(V);
19571 case MVT::i16:
19572 // +- imm8
19573 return isUInt<8>(V);
19574 case MVT::f32:
19575 case MVT::f64:
19576 if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
19577 return false;
19578 return isShiftedUInt<8, 2>(V);
19579 }
19580}
19581
19583 EVT VT) const {
19584 int Scale = AM.Scale;
19585 if (Scale < 0)
19586 return false;
19587
19588 switch (VT.getSimpleVT().SimpleTy) {
19589 default: return false;
19590 case MVT::i1:
19591 case MVT::i8:
19592 case MVT::i16:
19593 case MVT::i32:
19594 if (Scale == 1)
19595 return true;
19596 // r + r << imm
19597 Scale = Scale & ~1;
19598 return Scale == 2 || Scale == 4 || Scale == 8;
19599 case MVT::i64:
19600 // FIXME: What are we trying to model here? ldrd doesn't have an r + r
19601 // version in Thumb mode.
19602 // r + r
19603 if (Scale == 1)
19604 return true;
19605 // r * 2 (this can be lowered to r + r).
19606 if (!AM.HasBaseReg && Scale == 2)
19607 return true;
19608 return false;
19609 case MVT::isVoid:
19610 // Note, we allow "void" uses (basically, uses that aren't loads or
19611 // stores), because arm allows folding a scale into many arithmetic
19612 // operations. This should be made more precise and revisited later.
19613
19614 // Allow r << imm, but the imm has to be a multiple of two.
19615 if (Scale & 1) return false;
19616 return isPowerOf2_32(Scale);
19617 }
19618}
19619
19621 EVT VT) const {
19622 const int Scale = AM.Scale;
19623
19624 // Negative scales are not supported in Thumb1.
19625 if (Scale < 0)
19626 return false;
19627
19628 // Thumb1 addressing modes do not support register scaling excepting the
19629 // following cases:
19630 // 1. Scale == 1 means no scaling.
19631 // 2. Scale == 2 this can be lowered to r + r if there is no base register.
19632 return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);
19633}
19634
19635/// isLegalAddressingMode - Return true if the addressing mode represented
19636/// by AM is legal for this target, for a load/store of the specified type.
19638 const AddrMode &AM, Type *Ty,
19639 unsigned AS, Instruction *I) const {
19640 EVT VT = getValueType(DL, Ty, true);
19641 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
19642 return false;
19643
19644 // Can never fold addr of global into load/store.
19645 if (AM.BaseGV)
19646 return false;
19647
19648 switch (AM.Scale) {
19649 case 0: // no scale reg, must be "r+i" or "r", or "i".
19650 break;
19651 default:
19652 // ARM doesn't support any R+R*scale+imm addr modes.
19653 if (AM.BaseOffs)
19654 return false;
19655
19656 if (!VT.isSimple())
19657 return false;
19658
19659 if (Subtarget->isThumb1Only())
19660 return isLegalT1ScaledAddressingMode(AM, VT);
19661
19662 if (Subtarget->isThumb2())
19663 return isLegalT2ScaledAddressingMode(AM, VT);
19664
19665 int Scale = AM.Scale;
19666 switch (VT.getSimpleVT().SimpleTy) {
19667 default: return false;
19668 case MVT::i1:
19669 case MVT::i8:
19670 case MVT::i32:
19671 if (Scale < 0) Scale = -Scale;
19672 if (Scale == 1)
19673 return true;
19674 // r + r << imm
19675 return isPowerOf2_32(Scale & ~1);
19676 case MVT::i16:
19677 case MVT::i64:
19678 // r +/- r
19679 if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
19680 return true;
19681 // r * 2 (this can be lowered to r + r).
19682 if (!AM.HasBaseReg && Scale == 2)
19683 return true;
19684 return false;
19685
19686 case MVT::isVoid:
19687 // Note, we allow "void" uses (basically, uses that aren't loads or
19688 // stores), because arm allows folding a scale into many arithmetic
19689 // operations. This should be made more precise and revisited later.
19690
19691 // Allow r << imm, but the imm has to be a multiple of two.
19692 if (Scale & 1) return false;
19693 return isPowerOf2_32(Scale);
19694 }
19695 }
19696 return true;
19697}
19698
19699/// isLegalICmpImmediate - Return true if the specified immediate is legal
19700/// icmp immediate, that is the target has icmp instructions which can compare
19701/// a register against the immediate without having to materialize the
19702/// immediate into a register.
19704 // Thumb2 and ARM modes can use cmn for negative immediates.
19705 if (!Subtarget->isThumb())
19706 return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
19707 ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
19708 if (Subtarget->isThumb2())
19709 return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
19710 ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
19711 // Thumb1 doesn't have cmn, and only 8-bit immediates.
19712 return Imm >= 0 && Imm <= 255;
19713}
19714
19715/// isLegalAddImmediate - Return true if the specified immediate is a legal add
19716/// *or sub* immediate, that is the target has add or sub instructions which can
19717/// add a register with the immediate without having to materialize the
19718/// immediate into a register.
19720 // Same encoding for add/sub, just flip the sign.
19721 int64_t AbsImm = std::abs(Imm);
19722 if (!Subtarget->isThumb())
19723 return ARM_AM::getSOImmVal(AbsImm) != -1;
19724 if (Subtarget->isThumb2())
19725 return ARM_AM::getT2SOImmVal(AbsImm) != -1;
19726 // Thumb1 only has 8-bit unsigned immediate.
19727 return AbsImm >= 0 && AbsImm <= 255;
19728}
19729
19730// Return false to prevent folding
19731// (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
19732// if the folding leads to worse code.
19734 SDValue ConstNode) const {
19735 // Let the DAGCombiner decide for vector types and large types.
19736 const EVT VT = AddNode.getValueType();
19737 if (VT.isVector() || VT.getScalarSizeInBits() > 32)
19738 return true;
19739
19740 // It is worse if c0 is legal add immediate, while c1*c0 is not
19741 // and has to be composed by at least two instructions.
19742 const ConstantSDNode *C0Node = cast<ConstantSDNode>(AddNode.getOperand(1));
19743 const ConstantSDNode *C1Node = cast<ConstantSDNode>(ConstNode);
19744 const int64_t C0 = C0Node->getSExtValue();
19745 APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue();
19747 return true;
19748 if (ConstantMaterializationCost((unsigned)CA.getZExtValue(), Subtarget) > 1)
19749 return false;
19750
19751 // Default to true and let the DAGCombiner decide.
19752 return true;
19753}
19754
19756 bool isSEXTLoad, SDValue &Base,
19757 SDValue &Offset, bool &isInc,
19758 SelectionDAG &DAG) {
19759 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19760 return false;
19761
19762 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
19763 // AddressingMode 3
19764 Base = Ptr->getOperand(0);
19765 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19766 int RHSC = (int)RHS->getZExtValue();
19767 if (RHSC < 0 && RHSC > -256) {
19768 assert(Ptr->getOpcode() == ISD::ADD);
19769 isInc = false;
19770 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19771 return true;
19772 }
19773 }
19774 isInc = (Ptr->getOpcode() == ISD::ADD);
19775 Offset = Ptr->getOperand(1);
19776 return true;
19777 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
19778 // AddressingMode 2
19779 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19780 int RHSC = (int)RHS->getZExtValue();
19781 if (RHSC < 0 && RHSC > -0x1000) {
19782 assert(Ptr->getOpcode() == ISD::ADD);
19783 isInc = false;
19784 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19785 Base = Ptr->getOperand(0);
19786 return true;
19787 }
19788 }
19789
19790 if (Ptr->getOpcode() == ISD::ADD) {
19791 isInc = true;
19792 ARM_AM::ShiftOpc ShOpcVal=
19793 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
19794 if (ShOpcVal != ARM_AM::no_shift) {
19795 Base = Ptr->getOperand(1);
19796 Offset = Ptr->getOperand(0);
19797 } else {
19798 Base = Ptr->getOperand(0);
19799 Offset = Ptr->getOperand(1);
19800 }
19801 return true;
19802 }
19803
19804 isInc = (Ptr->getOpcode() == ISD::ADD);
19805 Base = Ptr->getOperand(0);
19806 Offset = Ptr->getOperand(1);
19807 return true;
19808 }
19809
19810 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
19811 return false;
19812}
19813
19815 bool isSEXTLoad, SDValue &Base,
19816 SDValue &Offset, bool &isInc,
19817 SelectionDAG &DAG) {
19818 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19819 return false;
19820
19821 Base = Ptr->getOperand(0);
19822 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19823 int RHSC = (int)RHS->getZExtValue();
19824 if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
19825 assert(Ptr->getOpcode() == ISD::ADD);
19826 isInc = false;
19827 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19828 return true;
19829 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
19830 isInc = Ptr->getOpcode() == ISD::ADD;
19831 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19832 return true;
19833 }
19834 }
19835
19836 return false;
19837}
19838
19839static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment,
19840 bool isSEXTLoad, bool IsMasked, bool isLE,
19842 bool &isInc, SelectionDAG &DAG) {
19843 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19844 return false;
19845 if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
19846 return false;
19847
19848 // We allow LE non-masked loads to change the type (for example use a vldrb.8
19849 // as opposed to a vldrw.32). This can allow extra addressing modes or
19850 // alignments for what is otherwise an equivalent instruction.
19851 bool CanChangeType = isLE && !IsMasked;
19852
19853 ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
19854 int RHSC = (int)RHS->getZExtValue();
19855
19856 auto IsInRange = [&](int RHSC, int Limit, int Scale) {
19857 if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
19858 assert(Ptr->getOpcode() == ISD::ADD);
19859 isInc = false;
19860 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19861 return true;
19862 } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
19863 isInc = Ptr->getOpcode() == ISD::ADD;
19864 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19865 return true;
19866 }
19867 return false;
19868 };
19869
19870 // Try to find a matching instruction based on s/zext, Alignment, Offset and
19871 // (in BE/masked) type.
19872 Base = Ptr->getOperand(0);
19873 if (VT == MVT::v4i16) {
19874 if (Alignment >= 2 && IsInRange(RHSC, 0x80, 2))
19875 return true;
19876 } else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
19877 if (IsInRange(RHSC, 0x80, 1))
19878 return true;
19879 } else if (Alignment >= 4 &&
19880 (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
19881 IsInRange(RHSC, 0x80, 4))
19882 return true;
19883 else if (Alignment >= 2 &&
19884 (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
19885 IsInRange(RHSC, 0x80, 2))
19886 return true;
19887 else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
19888 return true;
19889 return false;
19890}
19891
19892/// getPreIndexedAddressParts - returns true by value, base pointer and
19893/// offset pointer and addressing mode by reference if the node's address
19894/// can be legally represented as pre-indexed load / store address.
19895bool
19897 SDValue &Offset,
19899 SelectionDAG &DAG) const {
19900 if (Subtarget->isThumb1Only())
19901 return false;
19902
19903 EVT VT;
19904 SDValue Ptr;
19905 Align Alignment;
19906 bool isSEXTLoad = false;
19907 bool IsMasked = false;
19908 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19909 Ptr = LD->getBasePtr();
19910 VT = LD->getMemoryVT();
19911 Alignment = LD->getAlign();
19912 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19913 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19914 Ptr = ST->getBasePtr();
19915 VT = ST->getMemoryVT();
19916 Alignment = ST->getAlign();
19917 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19918 Ptr = LD->getBasePtr();
19919 VT = LD->getMemoryVT();
19920 Alignment = LD->getAlign();
19921 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19922 IsMasked = true;
19923 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19924 Ptr = ST->getBasePtr();
19925 VT = ST->getMemoryVT();
19926 Alignment = ST->getAlign();
19927 IsMasked = true;
19928 } else
19929 return false;
19930
19931 bool isInc;
19932 bool isLegal = false;
19933 if (VT.isVector())
19934 isLegal = Subtarget->hasMVEIntegerOps() &&
19936 Ptr.getNode(), VT, Alignment, isSEXTLoad, IsMasked,
19937 Subtarget->isLittle(), Base, Offset, isInc, DAG);
19938 else {
19939 if (Subtarget->isThumb2())
19940 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19941 Offset, isInc, DAG);
19942 else
19943 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19944 Offset, isInc, DAG);
19945 }
19946 if (!isLegal)
19947 return false;
19948
19949 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
19950 return true;
19951}
19952
19953/// getPostIndexedAddressParts - returns true by value, base pointer and
19954/// offset pointer and addressing mode by reference if this node can be
19955/// combined with a load / store to form a post-indexed load / store.
19957 SDValue &Base,
19958 SDValue &Offset,
19960 SelectionDAG &DAG) const {
19961 EVT VT;
19962 SDValue Ptr;
19963 Align Alignment;
19964 bool isSEXTLoad = false, isNonExt;
19965 bool IsMasked = false;
19966 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19967 VT = LD->getMemoryVT();
19968 Ptr = LD->getBasePtr();
19969 Alignment = LD->getAlign();
19970 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19971 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19972 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19973 VT = ST->getMemoryVT();
19974 Ptr = ST->getBasePtr();
19975 Alignment = ST->getAlign();
19976 isNonExt = !ST->isTruncatingStore();
19977 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19978 VT = LD->getMemoryVT();
19979 Ptr = LD->getBasePtr();
19980 Alignment = LD->getAlign();
19981 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19982 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19983 IsMasked = true;
19984 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19985 VT = ST->getMemoryVT();
19986 Ptr = ST->getBasePtr();
19987 Alignment = ST->getAlign();
19988 isNonExt = !ST->isTruncatingStore();
19989 IsMasked = true;
19990 } else
19991 return false;
19992
19993 if (Subtarget->isThumb1Only()) {
19994 // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
19995 // must be non-extending/truncating, i32, with an offset of 4.
19996 assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
19997 if (Op->getOpcode() != ISD::ADD || !isNonExt)
19998 return false;
19999 auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
20000 if (!RHS || RHS->getZExtValue() != 4)
20001 return false;
20002 if (Alignment < Align(4))
20003 return false;
20004
20005 Offset = Op->getOperand(1);
20006 Base = Op->getOperand(0);
20007 AM = ISD::POST_INC;
20008 return true;
20009 }
20010
20011 bool isInc;
20012 bool isLegal = false;
20013 if (VT.isVector())
20014 isLegal = Subtarget->hasMVEIntegerOps() &&
20015 getMVEIndexedAddressParts(Op, VT, Alignment, isSEXTLoad, IsMasked,
20016 Subtarget->isLittle(), Base, Offset,
20017 isInc, DAG);
20018 else {
20019 if (Subtarget->isThumb2())
20020 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
20021 isInc, DAG);
20022 else
20023 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
20024 isInc, DAG);
20025 }
20026 if (!isLegal)
20027 return false;
20028
20029 if (Ptr != Base) {
20030 // Swap base ptr and offset to catch more post-index load / store when
20031 // it's legal. In Thumb2 mode, offset must be an immediate.
20032 if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
20033 !Subtarget->isThumb2())
20035
20036 // Post-indexed load / store update the base pointer.
20037 if (Ptr != Base)
20038 return false;
20039 }
20040
20041 AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
20042 return true;
20043}
20044
20046 KnownBits &Known,
20047 const APInt &DemandedElts,
20048 const SelectionDAG &DAG,
20049 unsigned Depth) const {
20050 unsigned BitWidth = Known.getBitWidth();
20051 Known.resetAll();
20052 switch (Op.getOpcode()) {
20053 default: break;
20054 case ARMISD::ADDC:
20055 case ARMISD::ADDE:
20056 case ARMISD::SUBC:
20057 case ARMISD::SUBE:
20058 // Special cases when we convert a carry to a boolean.
20059 if (Op.getResNo() == 0) {
20060 SDValue LHS = Op.getOperand(0);
20061 SDValue RHS = Op.getOperand(1);
20062 // (ADDE 0, 0, C) will give us a single bit.
20063 if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
20066 return;
20067 }
20068 }
20069 break;
20070 case ARMISD::CMOV: {
20071 // Bits are known zero/one if known on the LHS and RHS.
20072 Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
20073 if (Known.isUnknown())
20074 return;
20075
20076 KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
20077 Known = Known.intersectWith(KnownRHS);
20078 return;
20079 }
20081 Intrinsic::ID IntID =
20082 static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1));
20083 switch (IntID) {
20084 default: return;
20085 case Intrinsic::arm_ldaex:
20086 case Intrinsic::arm_ldrex: {
20087 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
20088 unsigned MemBits = VT.getScalarSizeInBits();
20089 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
20090 return;
20091 }
20092 }
20093 }
20094 case ARMISD::BFI: {
20095 // Conservatively, we can recurse down the first operand
20096 // and just mask out all affected bits.
20097 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
20098
20099 // The operand to BFI is already a mask suitable for removing the bits it
20100 // sets.
20101 const APInt &Mask = Op.getConstantOperandAPInt(2);
20102 Known.Zero &= Mask;
20103 Known.One &= Mask;
20104 return;
20105 }
20106 case ARMISD::VGETLANEs:
20107 case ARMISD::VGETLANEu: {
20108 const SDValue &SrcSV = Op.getOperand(0);
20109 EVT VecVT = SrcSV.getValueType();
20110 assert(VecVT.isVector() && "VGETLANE expected a vector type");
20111 const unsigned NumSrcElts = VecVT.getVectorNumElements();
20112 ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
20113 assert(Pos->getAPIntValue().ult(NumSrcElts) &&
20114 "VGETLANE index out of bounds");
20115 unsigned Idx = Pos->getZExtValue();
20116 APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
20117 Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
20118
20119 EVT VT = Op.getValueType();
20120 const unsigned DstSz = VT.getScalarSizeInBits();
20121 const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
20122 (void)SrcSz;
20123 assert(SrcSz == Known.getBitWidth());
20124 assert(DstSz > SrcSz);
20125 if (Op.getOpcode() == ARMISD::VGETLANEs)
20126 Known = Known.sext(DstSz);
20127 else {
20128 Known = Known.zext(DstSz);
20129 }
20130 assert(DstSz == Known.getBitWidth());
20131 break;
20132 }
20133 case ARMISD::VMOVrh: {
20134 KnownBits KnownOp = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20135 assert(KnownOp.getBitWidth() == 16);
20136 Known = KnownOp.zext(32);
20137 break;
20138 }
20139 case ARMISD::CSINC:
20140 case ARMISD::CSINV:
20141 case ARMISD::CSNEG: {
20142 KnownBits KnownOp0 = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20143 KnownBits KnownOp1 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
20144
20145 // The result is either:
20146 // CSINC: KnownOp0 or KnownOp1 + 1
20147 // CSINV: KnownOp0 or ~KnownOp1
20148 // CSNEG: KnownOp0 or KnownOp1 * -1
20149 if (Op.getOpcode() == ARMISD::CSINC)
20150 KnownOp1 = KnownBits::computeForAddSub(
20151 /*Add=*/true, /*NSW=*/false, /*NUW=*/false, KnownOp1,
20153 else if (Op.getOpcode() == ARMISD::CSINV)
20154 std::swap(KnownOp1.Zero, KnownOp1.One);
20155 else if (Op.getOpcode() == ARMISD::CSNEG)
20156 KnownOp1 = KnownBits::mul(
20157 KnownOp1, KnownBits::makeConstant(APInt(32, -1)));
20158
20159 Known = KnownOp0.intersectWith(KnownOp1);
20160 break;
20161 }
20162 }
20163}
20164
20166 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
20167 TargetLoweringOpt &TLO) const {
20168 // Delay optimization, so we don't have to deal with illegal types, or block
20169 // optimizations.
20170 if (!TLO.LegalOps)
20171 return false;
20172
20173 // Only optimize AND for now.
20174 if (Op.getOpcode() != ISD::AND)
20175 return false;
20176
20177 EVT VT = Op.getValueType();
20178
20179 // Ignore vectors.
20180 if (VT.isVector())
20181 return false;
20182
20183 assert(VT == MVT::i32 && "Unexpected integer type");
20184
20185 // Make sure the RHS really is a constant.
20186 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
20187 if (!C)
20188 return false;
20189
20190 unsigned Mask = C->getZExtValue();
20191
20192 unsigned Demanded = DemandedBits.getZExtValue();
20193 unsigned ShrunkMask = Mask & Demanded;
20194 unsigned ExpandedMask = Mask | ~Demanded;
20195
20196 // If the mask is all zeros, let the target-independent code replace the
20197 // result with zero.
20198 if (ShrunkMask == 0)
20199 return false;
20200
20201 // If the mask is all ones, erase the AND. (Currently, the target-independent
20202 // code won't do this, so we have to do it explicitly to avoid an infinite
20203 // loop in obscure cases.)
20204 if (ExpandedMask == ~0U)
20205 return TLO.CombineTo(Op, Op.getOperand(0));
20206
20207 auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
20208 return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
20209 };
20210 auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
20211 if (NewMask == Mask)
20212 return true;
20213 SDLoc DL(Op);
20214 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
20215 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
20216 return TLO.CombineTo(Op, NewOp);
20217 };
20218
20219 // Prefer uxtb mask.
20220 if (IsLegalMask(0xFF))
20221 return UseMask(0xFF);
20222
20223 // Prefer uxth mask.
20224 if (IsLegalMask(0xFFFF))
20225 return UseMask(0xFFFF);
20226
20227 // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
20228 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20229 if (ShrunkMask < 256)
20230 return UseMask(ShrunkMask);
20231
20232 // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
20233 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20234 if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
20235 return UseMask(ExpandedMask);
20236
20237 // Potential improvements:
20238 //
20239 // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
20240 // We could try to prefer Thumb1 immediates which can be lowered to a
20241 // two-instruction sequence.
20242 // We could try to recognize more legal ARM/Thumb2 immediates here.
20243
20244 return false;
20245}
20246
20248 SDValue Op, const APInt &OriginalDemandedBits,
20249 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
20250 unsigned Depth) const {
20251 unsigned Opc = Op.getOpcode();
20252
20253 switch (Opc) {
20254 case ARMISD::ASRL:
20255 case ARMISD::LSRL: {
20256 // If this is result 0 and the other result is unused, see if the demand
20257 // bits allow us to shrink this long shift into a standard small shift in
20258 // the opposite direction.
20259 if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
20260 isa<ConstantSDNode>(Op->getOperand(2))) {
20261 unsigned ShAmt = Op->getConstantOperandVal(2);
20262 if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(APInt::getAllOnes(32)
20263 << (32 - ShAmt)))
20264 return TLO.CombineTo(
20265 Op, TLO.DAG.getNode(
20266 ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1),
20267 TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32)));
20268 }
20269 break;
20270 }
20271 case ARMISD::VBICIMM: {
20272 SDValue Op0 = Op.getOperand(0);
20273 unsigned ModImm = Op.getConstantOperandVal(1);
20274 unsigned EltBits = 0;
20275 uint64_t Mask = ARM_AM::decodeVMOVModImm(ModImm, EltBits);
20276 if ((OriginalDemandedBits & Mask) == 0)
20277 return TLO.CombineTo(Op, Op0);
20278 }
20279 }
20280
20282 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
20283}
20284
20285//===----------------------------------------------------------------------===//
20286// ARM Inline Assembly Support
20287//===----------------------------------------------------------------------===//
20288
20290 // Looking for "rev" which is V6+.
20291 if (!Subtarget->hasV6Ops())
20292 return false;
20293
20294 InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
20295 StringRef AsmStr = IA->getAsmString();
20296 SmallVector<StringRef, 4> AsmPieces;
20297 SplitString(AsmStr, AsmPieces, ";\n");
20298
20299 switch (AsmPieces.size()) {
20300 default: return false;
20301 case 1:
20302 AsmStr = AsmPieces[0];
20303 AsmPieces.clear();
20304 SplitString(AsmStr, AsmPieces, " \t,");
20305
20306 // rev $0, $1
20307 if (AsmPieces.size() == 3 &&
20308 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
20309 IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
20310 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
20311 if (Ty && Ty->getBitWidth() == 32)
20313 }
20314 break;
20315 }
20316
20317 return false;
20318}
20319
20320const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
20321 // At this point, we have to lower this constraint to something else, so we
20322 // lower it to an "r" or "w". However, by doing this we will force the result
20323 // to be in register, while the X constraint is much more permissive.
20324 //
20325 // Although we are correct (we are free to emit anything, without
20326 // constraints), we might break use cases that would expect us to be more
20327 // efficient and emit something else.
20328 if (!Subtarget->hasVFP2Base())
20329 return "r";
20330 if (ConstraintVT.isFloatingPoint())
20331 return "w";
20332 if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
20333 (ConstraintVT.getSizeInBits() == 64 ||
20334 ConstraintVT.getSizeInBits() == 128))
20335 return "w";
20336
20337 return "r";
20338}
20339
20340/// getConstraintType - Given a constraint letter, return the type of
20341/// constraint it is for this target.
20344 unsigned S = Constraint.size();
20345 if (S == 1) {
20346 switch (Constraint[0]) {
20347 default: break;
20348 case 'l': return C_RegisterClass;
20349 case 'w': return C_RegisterClass;
20350 case 'h': return C_RegisterClass;
20351 case 'x': return C_RegisterClass;
20352 case 't': return C_RegisterClass;
20353 case 'j': return C_Immediate; // Constant for movw.
20354 // An address with a single base register. Due to the way we
20355 // currently handle addresses it is the same as an 'r' memory constraint.
20356 case 'Q': return C_Memory;
20357 }
20358 } else if (S == 2) {
20359 switch (Constraint[0]) {
20360 default: break;
20361 case 'T': return C_RegisterClass;
20362 // All 'U+' constraints are addresses.
20363 case 'U': return C_Memory;
20364 }
20365 }
20366 return TargetLowering::getConstraintType(Constraint);
20367}
20368
20369/// Examine constraint type and operand type and determine a weight value.
20370/// This object must already have been set up with the operand type
20371/// and the current alternative constraint selected.
20374 AsmOperandInfo &info, const char *constraint) const {
20376 Value *CallOperandVal = info.CallOperandVal;
20377 // If we don't have a value, we can't do a match,
20378 // but allow it at the lowest weight.
20379 if (!CallOperandVal)
20380 return CW_Default;
20381 Type *type = CallOperandVal->getType();
20382 // Look at the constraint type.
20383 switch (*constraint) {
20384 default:
20386 break;
20387 case 'l':
20388 if (type->isIntegerTy()) {
20389 if (Subtarget->isThumb())
20390 weight = CW_SpecificReg;
20391 else
20392 weight = CW_Register;
20393 }
20394 break;
20395 case 'w':
20396 if (type->isFloatingPointTy())
20397 weight = CW_Register;
20398 break;
20399 }
20400 return weight;
20401}
20402
20403using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
20404
20406 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
20407 switch (Constraint.size()) {
20408 case 1:
20409 // GCC ARM Constraint Letters
20410 switch (Constraint[0]) {
20411 case 'l': // Low regs or general regs.
20412 if (Subtarget->isThumb())
20413 return RCPair(0U, &ARM::tGPRRegClass);
20414 return RCPair(0U, &ARM::GPRRegClass);
20415 case 'h': // High regs or no regs.
20416 if (Subtarget->isThumb())
20417 return RCPair(0U, &ARM::hGPRRegClass);
20418 break;
20419 case 'r':
20420 if (Subtarget->isThumb1Only())
20421 return RCPair(0U, &ARM::tGPRRegClass);
20422 return RCPair(0U, &ARM::GPRRegClass);
20423 case 'w':
20424 if (VT == MVT::Other)
20425 break;
20426 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20427 return RCPair(0U, &ARM::SPRRegClass);
20428 if (VT.getSizeInBits() == 64)
20429 return RCPair(0U, &ARM::DPRRegClass);
20430 if (VT.getSizeInBits() == 128)
20431 return RCPair(0U, &ARM::QPRRegClass);
20432 break;
20433 case 'x':
20434 if (VT == MVT::Other)
20435 break;
20436 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20437 return RCPair(0U, &ARM::SPR_8RegClass);
20438 if (VT.getSizeInBits() == 64)
20439 return RCPair(0U, &ARM::DPR_8RegClass);
20440 if (VT.getSizeInBits() == 128)
20441 return RCPair(0U, &ARM::QPR_8RegClass);
20442 break;
20443 case 't':
20444 if (VT == MVT::Other)
20445 break;
20446 if (VT == MVT::f32 || VT == MVT::i32 || VT == MVT::f16 || VT == MVT::bf16)
20447 return RCPair(0U, &ARM::SPRRegClass);
20448 if (VT.getSizeInBits() == 64)
20449 return RCPair(0U, &ARM::DPR_VFP2RegClass);
20450 if (VT.getSizeInBits() == 128)
20451 return RCPair(0U, &ARM::QPR_VFP2RegClass);
20452 break;
20453 }
20454 break;
20455
20456 case 2:
20457 if (Constraint[0] == 'T') {
20458 switch (Constraint[1]) {
20459 default:
20460 break;
20461 case 'e':
20462 return RCPair(0U, &ARM::tGPREvenRegClass);
20463 case 'o':
20464 return RCPair(0U, &ARM::tGPROddRegClass);
20465 }
20466 }
20467 break;
20468
20469 default:
20470 break;
20471 }
20472
20473 if (StringRef("{cc}").equals_insensitive(Constraint))
20474 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
20475
20476 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
20477}
20478
20479/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
20480/// vector. If it is invalid, don't add anything to Ops.
20482 StringRef Constraint,
20483 std::vector<SDValue> &Ops,
20484 SelectionDAG &DAG) const {
20485 SDValue Result;
20486
20487 // Currently only support length 1 constraints.
20488 if (Constraint.size() != 1)
20489 return;
20490
20491 char ConstraintLetter = Constraint[0];
20492 switch (ConstraintLetter) {
20493 default: break;
20494 case 'j':
20495 case 'I': case 'J': case 'K': case 'L':
20496 case 'M': case 'N': case 'O':
20497 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
20498 if (!C)
20499 return;
20500
20501 int64_t CVal64 = C->getSExtValue();
20502 int CVal = (int) CVal64;
20503 // None of these constraints allow values larger than 32 bits. Check
20504 // that the value fits in an int.
20505 if (CVal != CVal64)
20506 return;
20507
20508 switch (ConstraintLetter) {
20509 case 'j':
20510 // Constant suitable for movw, must be between 0 and
20511 // 65535.
20512 if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
20513 if (CVal >= 0 && CVal <= 65535)
20514 break;
20515 return;
20516 case 'I':
20517 if (Subtarget->isThumb1Only()) {
20518 // This must be a constant between 0 and 255, for ADD
20519 // immediates.
20520 if (CVal >= 0 && CVal <= 255)
20521 break;
20522 } else if (Subtarget->isThumb2()) {
20523 // A constant that can be used as an immediate value in a
20524 // data-processing instruction.
20525 if (ARM_AM::getT2SOImmVal(CVal) != -1)
20526 break;
20527 } else {
20528 // A constant that can be used as an immediate value in a
20529 // data-processing instruction.
20530 if (ARM_AM::getSOImmVal(CVal) != -1)
20531 break;
20532 }
20533 return;
20534
20535 case 'J':
20536 if (Subtarget->isThumb1Only()) {
20537 // This must be a constant between -255 and -1, for negated ADD
20538 // immediates. This can be used in GCC with an "n" modifier that
20539 // prints the negated value, for use with SUB instructions. It is
20540 // not useful otherwise but is implemented for compatibility.
20541 if (CVal >= -255 && CVal <= -1)
20542 break;
20543 } else {
20544 // This must be a constant between -4095 and 4095. It is not clear
20545 // what this constraint is intended for. Implemented for
20546 // compatibility with GCC.
20547 if (CVal >= -4095 && CVal <= 4095)
20548 break;
20549 }
20550 return;
20551
20552 case 'K':
20553 if (Subtarget->isThumb1Only()) {
20554 // A 32-bit value where only one byte has a nonzero value. Exclude
20555 // zero to match GCC. This constraint is used by GCC internally for
20556 // constants that can be loaded with a move/shift combination.
20557 // It is not useful otherwise but is implemented for compatibility.
20558 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
20559 break;
20560 } else if (Subtarget->isThumb2()) {
20561 // A constant whose bitwise inverse can be used as an immediate
20562 // value in a data-processing instruction. This can be used in GCC
20563 // with a "B" modifier that prints the inverted value, for use with
20564 // BIC and MVN instructions. It is not useful otherwise but is
20565 // implemented for compatibility.
20566 if (ARM_AM::getT2SOImmVal(~CVal) != -1)
20567 break;
20568 } else {
20569 // A constant whose bitwise inverse can be used as an immediate
20570 // value in a data-processing instruction. This can be used in GCC
20571 // with a "B" modifier that prints the inverted value, for use with
20572 // BIC and MVN instructions. It is not useful otherwise but is
20573 // implemented for compatibility.
20574 if (ARM_AM::getSOImmVal(~CVal) != -1)
20575 break;
20576 }
20577 return;
20578
20579 case 'L':
20580 if (Subtarget->isThumb1Only()) {
20581 // This must be a constant between -7 and 7,
20582 // for 3-operand ADD/SUB immediate instructions.
20583 if (CVal >= -7 && CVal < 7)
20584 break;
20585 } else if (Subtarget->isThumb2()) {
20586 // A constant whose negation can be used as an immediate value in a
20587 // data-processing instruction. This can be used in GCC with an "n"
20588 // modifier that prints the negated value, for use with SUB
20589 // instructions. It is not useful otherwise but is implemented for
20590 // compatibility.
20591 if (ARM_AM::getT2SOImmVal(-CVal) != -1)
20592 break;
20593 } else {
20594 // A constant whose negation can be used as an immediate value in a
20595 // data-processing instruction. This can be used in GCC with an "n"
20596 // modifier that prints the negated value, for use with SUB
20597 // instructions. It is not useful otherwise but is implemented for
20598 // compatibility.
20599 if (ARM_AM::getSOImmVal(-CVal) != -1)
20600 break;
20601 }
20602 return;
20603
20604 case 'M':
20605 if (Subtarget->isThumb1Only()) {
20606 // This must be a multiple of 4 between 0 and 1020, for
20607 // ADD sp + immediate.
20608 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
20609 break;
20610 } else {
20611 // A power of two or a constant between 0 and 32. This is used in
20612 // GCC for the shift amount on shifted register operands, but it is
20613 // useful in general for any shift amounts.
20614 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
20615 break;
20616 }
20617 return;
20618
20619 case 'N':
20620 if (Subtarget->isThumb1Only()) {
20621 // This must be a constant between 0 and 31, for shift amounts.
20622 if (CVal >= 0 && CVal <= 31)
20623 break;
20624 }
20625 return;
20626
20627 case 'O':
20628 if (Subtarget->isThumb1Only()) {
20629 // This must be a multiple of 4 between -508 and 508, for
20630 // ADD/SUB sp = sp + immediate.
20631 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
20632 break;
20633 }
20634 return;
20635 }
20636 Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType());
20637 break;
20638 }
20639
20640 if (Result.getNode()) {
20641 Ops.push_back(Result);
20642 return;
20643 }
20644 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20645}
20646
20648 const SDNode *N, MVT::SimpleValueType SVT) {
20649 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20650 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20651 "Unhandled Opcode in getDivRemLibcall");
20652 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20653 N->getOpcode() == ISD::SREM;
20654 RTLIB::Libcall LC;
20655 switch (SVT) {
20656 default: llvm_unreachable("Unexpected request for libcall!");
20657 case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
20658 case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
20659 case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
20660 case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
20661 }
20662 return LC;
20663}
20664
20666 const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
20667 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20668 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20669 "Unhandled Opcode in getDivRemArgList");
20670 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20671 N->getOpcode() == ISD::SREM;
20674 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20675 EVT ArgVT = N->getOperand(i).getValueType();
20676 Type *ArgTy = ArgVT.getTypeForEVT(*Context);
20677 Entry.Node = N->getOperand(i);
20678 Entry.Ty = ArgTy;
20679 Entry.IsSExt = isSigned;
20680 Entry.IsZExt = !isSigned;
20681 Args.push_back(Entry);
20682 }
20683 if (Subtarget->isTargetWindows() && Args.size() >= 2)
20684 std::swap(Args[0], Args[1]);
20685 return Args;
20686}
20687
20688SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
20689 assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
20690 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
20691 Subtarget->isTargetWindows()) &&
20692 "Register-based DivRem lowering only");
20693 unsigned Opcode = Op->getOpcode();
20694 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
20695 "Invalid opcode for Div/Rem lowering");
20696 bool isSigned = (Opcode == ISD::SDIVREM);
20697 EVT VT = Op->getValueType(0);
20698 SDLoc dl(Op);
20699
20700 if (VT == MVT::i64 && isa<ConstantSDNode>(Op.getOperand(1))) {
20702 if (expandDIVREMByConstant(Op.getNode(), Result, MVT::i32, DAG)) {
20703 SDValue Res0 =
20704 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[0], Result[1]);
20705 SDValue Res1 =
20706 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[2], Result[3]);
20707 return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
20708 {Res0, Res1});
20709 }
20710 }
20711
20712 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
20713
20714 // If the target has hardware divide, use divide + multiply + subtract:
20715 // div = a / b
20716 // rem = a - b * div
20717 // return {div, rem}
20718 // This should be lowered into UDIV/SDIV + MLS later on.
20719 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
20720 : Subtarget->hasDivideInARMMode();
20721 if (hasDivide && Op->getValueType(0).isSimple() &&
20722 Op->getSimpleValueType(0) == MVT::i32) {
20723 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
20724 const SDValue Dividend = Op->getOperand(0);
20725 const SDValue Divisor = Op->getOperand(1);
20726 SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
20727 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
20728 SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
20729
20730 SDValue Values[2] = {Div, Rem};
20731 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
20732 }
20733
20734 RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
20735 VT.getSimpleVT().SimpleTy);
20736 SDValue InChain = DAG.getEntryNode();
20737
20739 DAG.getContext(),
20740 Subtarget);
20741
20744
20745 Type *RetTy = StructType::get(Ty, Ty);
20746
20747 if (Subtarget->isTargetWindows())
20748 InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
20749
20751 CLI.setDebugLoc(dl).setChain(InChain)
20752 .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
20754
20755 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
20756 return CallInfo.first;
20757}
20758
20759// Lowers REM using divmod helpers
20760// see RTABI section 4.2/4.3
20761SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
20762 EVT VT = N->getValueType(0);
20763
20764 if (VT == MVT::i64 && isa<ConstantSDNode>(N->getOperand(1))) {
20766 if (expandDIVREMByConstant(N, Result, MVT::i32, DAG))
20767 return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), N->getValueType(0),
20768 Result[0], Result[1]);
20769 }
20770
20771 // Build return types (div and rem)
20772 std::vector<Type*> RetTyParams;
20773 Type *RetTyElement;
20774
20775 switch (VT.getSimpleVT().SimpleTy) {
20776 default: llvm_unreachable("Unexpected request for libcall!");
20777 case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
20778 case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
20779 case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
20780 case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
20781 }
20782
20783 RetTyParams.push_back(RetTyElement);
20784 RetTyParams.push_back(RetTyElement);
20785 ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
20786 Type *RetTy = StructType::get(*DAG.getContext(), ret);
20787
20788 RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
20789 SimpleTy);
20790 SDValue InChain = DAG.getEntryNode();
20792 Subtarget);
20793 bool isSigned = N->getOpcode() == ISD::SREM;
20796
20797 if (Subtarget->isTargetWindows())
20798 InChain = WinDBZCheckDenominator(DAG, N, InChain);
20799
20800 // Lower call
20801 CallLoweringInfo CLI(DAG);
20802 CLI.setChain(InChain)
20803 .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
20805 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
20806
20807 // Return second (rem) result operand (first contains div)
20808 SDNode *ResNode = CallResult.first.getNode();
20809 assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
20810 return ResNode->getOperand(1);
20811}
20812
20813SDValue
20814ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
20815 assert(Subtarget->isTargetWindows() && "unsupported target platform");
20816 SDLoc DL(Op);
20817
20818 // Get the inputs.
20819 SDValue Chain = Op.getOperand(0);
20820 SDValue Size = Op.getOperand(1);
20821
20823 "no-stack-arg-probe")) {
20825 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
20826 SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20827 Chain = SP.getValue(1);
20828 SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);
20829 if (Align)
20830 SP =
20831 DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
20832 DAG.getConstant(-(uint64_t)Align->value(), DL, MVT::i32));
20833 Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);
20834 SDValue Ops[2] = { SP, Chain };
20835 return DAG.getMergeValues(Ops, DL);
20836 }
20837
20838 SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
20839 DAG.getConstant(2, DL, MVT::i32));
20840
20841 SDValue Glue;
20842 Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Glue);
20843 Glue = Chain.getValue(1);
20844
20845 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20846 Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Glue);
20847
20848 SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20849 Chain = NewSP.getValue(1);
20850
20851 SDValue Ops[2] = { NewSP, Chain };
20852 return DAG.getMergeValues(Ops, DL);
20853}
20854
20855SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
20856 bool IsStrict = Op->isStrictFPOpcode();
20857 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20858 const unsigned DstSz = Op.getValueType().getSizeInBits();
20859 const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
20860 assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
20861 "Unexpected type for custom-lowering FP_EXTEND");
20862
20863 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20864 "With both FP DP and 16, any FP conversion is legal!");
20865
20866 assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
20867 "With FP16, 16 to 32 conversion is legal!");
20868
20869 // Converting from 32 -> 64 is valid if we have FP64.
20870 if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
20871 // FIXME: Remove this when we have strict fp instruction selection patterns
20872 if (IsStrict) {
20873 SDLoc Loc(Op);
20875 Loc, Op.getValueType(), SrcVal);
20876 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
20877 }
20878 return Op;
20879 }
20880
20881 // Either we are converting from 16 -> 64, without FP16 and/or
20882 // FP.double-precision or without Armv8-fp. So we must do it in two
20883 // steps.
20884 // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
20885 // without FP16. So we must do a function call.
20886 SDLoc Loc(Op);
20887 RTLIB::Libcall LC;
20888 MakeLibCallOptions CallOptions;
20889 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20890 for (unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {
20891 bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
20892 MVT SrcVT = (Sz == 16 ? MVT::f16 : MVT::f32);
20893 MVT DstVT = (Sz == 16 ? MVT::f32 : MVT::f64);
20894 if (Supported) {
20895 if (IsStrict) {
20896 SrcVal = DAG.getNode(ISD::STRICT_FP_EXTEND, Loc,
20897 {DstVT, MVT::Other}, {Chain, SrcVal});
20898 Chain = SrcVal.getValue(1);
20899 } else {
20900 SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, DstVT, SrcVal);
20901 }
20902 } else {
20903 LC = RTLIB::getFPEXT(SrcVT, DstVT);
20904 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20905 "Unexpected type for custom-lowering FP_EXTEND");
20906 std::tie(SrcVal, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20907 Loc, Chain);
20908 }
20909 }
20910
20911 return IsStrict ? DAG.getMergeValues({SrcVal, Chain}, Loc) : SrcVal;
20912}
20913
20914SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
20915 bool IsStrict = Op->isStrictFPOpcode();
20916
20917 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20918 EVT SrcVT = SrcVal.getValueType();
20919 EVT DstVT = Op.getValueType();
20920 const unsigned DstSz = Op.getValueType().getSizeInBits();
20921 const unsigned SrcSz = SrcVT.getSizeInBits();
20922 (void)DstSz;
20923 assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&
20924 "Unexpected type for custom-lowering FP_ROUND");
20925
20926 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20927 "With both FP DP and 16, any FP conversion is legal!");
20928
20929 SDLoc Loc(Op);
20930
20931 // Instruction from 32 -> 16 if hasFP16 is valid
20932 if (SrcSz == 32 && Subtarget->hasFP16())
20933 return Op;
20934
20935 // Lib call from 32 -> 16 / 64 -> [32, 16]
20936 RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
20937 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20938 "Unexpected type for custom-lowering FP_ROUND");
20939 MakeLibCallOptions CallOptions;
20940 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20942 std::tie(Result, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20943 Loc, Chain);
20944 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
20945}
20946
20947bool
20949 // The ARM target isn't yet aware of offsets.
20950 return false;
20951}
20952
20954 if (v == 0xffffffff)
20955 return false;
20956
20957 // there can be 1's on either or both "outsides", all the "inside"
20958 // bits must be 0's
20959 return isShiftedMask_32(~v);
20960}
20961
20962/// isFPImmLegal - Returns true if the target can instruction select the
20963/// specified FP immediate natively. If false, the legalizer will
20964/// materialize the FP immediate as a load from a constant pool.
20966 bool ForCodeSize) const {
20967 if (!Subtarget->hasVFP3Base())
20968 return false;
20969 if (VT == MVT::f16 && Subtarget->hasFullFP16())
20970 return ARM_AM::getFP16Imm(Imm) != -1;
20971 if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
20972 ARM_AM::getFP32FP16Imm(Imm) != -1)
20973 return true;
20974 if (VT == MVT::f32)
20975 return ARM_AM::getFP32Imm(Imm) != -1;
20976 if (VT == MVT::f64 && Subtarget->hasFP64())
20977 return ARM_AM::getFP64Imm(Imm) != -1;
20978 return false;
20979}
20980
20981/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
20982/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
20983/// specified in the intrinsic calls.
20985 const CallInst &I,
20986 MachineFunction &MF,
20987 unsigned Intrinsic) const {
20988 switch (Intrinsic) {
20989 case Intrinsic::arm_neon_vld1:
20990 case Intrinsic::arm_neon_vld2:
20991 case Intrinsic::arm_neon_vld3:
20992 case Intrinsic::arm_neon_vld4:
20993 case Intrinsic::arm_neon_vld2lane:
20994 case Intrinsic::arm_neon_vld3lane:
20995 case Intrinsic::arm_neon_vld4lane:
20996 case Intrinsic::arm_neon_vld2dup:
20997 case Intrinsic::arm_neon_vld3dup:
20998 case Intrinsic::arm_neon_vld4dup: {
21000 // Conservatively set memVT to the entire set of vectors loaded.
21001 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
21002 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
21003 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21004 Info.ptrVal = I.getArgOperand(0);
21005 Info.offset = 0;
21006 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
21007 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
21008 // volatile loads with NEON intrinsics not supported
21010 return true;
21011 }
21012 case Intrinsic::arm_neon_vld1x2:
21013 case Intrinsic::arm_neon_vld1x3:
21014 case Intrinsic::arm_neon_vld1x4: {
21016 // Conservatively set memVT to the entire set of vectors loaded.
21017 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
21018 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
21019 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21020 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
21021 Info.offset = 0;
21022 Info.align.reset();
21023 // volatile loads with NEON intrinsics not supported
21025 return true;
21026 }
21027 case Intrinsic::arm_neon_vst1:
21028 case Intrinsic::arm_neon_vst2:
21029 case Intrinsic::arm_neon_vst3:
21030 case Intrinsic::arm_neon_vst4:
21031 case Intrinsic::arm_neon_vst2lane:
21032 case Intrinsic::arm_neon_vst3lane:
21033 case Intrinsic::arm_neon_vst4lane: {
21035 // Conservatively set memVT to the entire set of vectors stored.
21036 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
21037 unsigned NumElts = 0;
21038 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
21039 Type *ArgTy = I.getArgOperand(ArgI)->getType();
21040 if (!ArgTy->isVectorTy())
21041 break;
21042 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
21043 }
21044 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21045 Info.ptrVal = I.getArgOperand(0);
21046 Info.offset = 0;
21047 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
21048 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
21049 // volatile stores with NEON intrinsics not supported
21051 return true;
21052 }
21053 case Intrinsic::arm_neon_vst1x2:
21054 case Intrinsic::arm_neon_vst1x3:
21055 case Intrinsic::arm_neon_vst1x4: {
21057 // Conservatively set memVT to the entire set of vectors stored.
21058 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
21059 unsigned NumElts = 0;
21060 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
21061 Type *ArgTy = I.getArgOperand(ArgI)->getType();
21062 if (!ArgTy->isVectorTy())
21063 break;
21064 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
21065 }
21066 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21067 Info.ptrVal = I.getArgOperand(0);
21068 Info.offset = 0;
21069 Info.align.reset();
21070 // volatile stores with NEON intrinsics not supported
21072 return true;
21073 }
21074 case Intrinsic::arm_mve_vld2q:
21075 case Intrinsic::arm_mve_vld4q: {
21077 // Conservatively set memVT to the entire set of vectors loaded.
21078 Type *VecTy = cast<StructType>(I.getType())->getElementType(1);
21079 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vld2q ? 2 : 4;
21080 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21081 Info.ptrVal = I.getArgOperand(0);
21082 Info.offset = 0;
21083 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21084 // volatile loads with MVE intrinsics not supported
21086 return true;
21087 }
21088 case Intrinsic::arm_mve_vst2q:
21089 case Intrinsic::arm_mve_vst4q: {
21091 // Conservatively set memVT to the entire set of vectors stored.
21092 Type *VecTy = I.getArgOperand(1)->getType();
21093 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vst2q ? 2 : 4;
21094 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21095 Info.ptrVal = I.getArgOperand(0);
21096 Info.offset = 0;
21097 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21098 // volatile stores with MVE intrinsics not supported
21100 return true;
21101 }
21102 case Intrinsic::arm_mve_vldr_gather_base:
21103 case Intrinsic::arm_mve_vldr_gather_base_predicated: {
21105 Info.ptrVal = nullptr;
21106 Info.memVT = MVT::getVT(I.getType());
21107 Info.align = Align(1);
21109 return true;
21110 }
21111 case Intrinsic::arm_mve_vldr_gather_base_wb:
21112 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
21114 Info.ptrVal = nullptr;
21115 Info.memVT = MVT::getVT(I.getType()->getContainedType(0));
21116 Info.align = Align(1);
21118 return true;
21119 }
21120 case Intrinsic::arm_mve_vldr_gather_offset:
21121 case Intrinsic::arm_mve_vldr_gather_offset_predicated: {
21123 Info.ptrVal = nullptr;
21124 MVT DataVT = MVT::getVT(I.getType());
21125 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
21126 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21127 DataVT.getVectorNumElements());
21128 Info.align = Align(1);
21130 return true;
21131 }
21132 case Intrinsic::arm_mve_vstr_scatter_base:
21133 case Intrinsic::arm_mve_vstr_scatter_base_predicated: {
21135 Info.ptrVal = nullptr;
21136 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21137 Info.align = Align(1);
21139 return true;
21140 }
21141 case Intrinsic::arm_mve_vstr_scatter_base_wb:
21142 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: {
21144 Info.ptrVal = nullptr;
21145 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21146 Info.align = Align(1);
21148 return true;
21149 }
21150 case Intrinsic::arm_mve_vstr_scatter_offset:
21151 case Intrinsic::arm_mve_vstr_scatter_offset_predicated: {
21153 Info.ptrVal = nullptr;
21154 MVT DataVT = MVT::getVT(I.getArgOperand(2)->getType());
21155 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
21156 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21157 DataVT.getVectorNumElements());
21158 Info.align = Align(1);
21160 return true;
21161 }
21162 case Intrinsic::arm_ldaex:
21163 case Intrinsic::arm_ldrex: {
21164 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
21165 Type *ValTy = I.getParamElementType(0);
21167 Info.memVT = MVT::getVT(ValTy);
21168 Info.ptrVal = I.getArgOperand(0);
21169 Info.offset = 0;
21170 Info.align = DL.getABITypeAlign(ValTy);
21172 return true;
21173 }
21174 case Intrinsic::arm_stlex:
21175 case Intrinsic::arm_strex: {
21176 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
21177 Type *ValTy = I.getParamElementType(1);
21179 Info.memVT = MVT::getVT(ValTy);
21180 Info.ptrVal = I.getArgOperand(1);
21181 Info.offset = 0;
21182 Info.align = DL.getABITypeAlign(ValTy);
21184 return true;
21185 }
21186 case Intrinsic::arm_stlexd:
21187 case Intrinsic::arm_strexd:
21189 Info.memVT = MVT::i64;
21190 Info.ptrVal = I.getArgOperand(2);
21191 Info.offset = 0;
21192 Info.align = Align(8);
21194 return true;
21195
21196 case Intrinsic::arm_ldaexd:
21197 case Intrinsic::arm_ldrexd:
21199 Info.memVT = MVT::i64;
21200 Info.ptrVal = I.getArgOperand(0);
21201 Info.offset = 0;
21202 Info.align = Align(8);
21204 return true;
21205
21206 default:
21207 break;
21208 }
21209
21210 return false;
21211}
21212
21213/// Returns true if it is beneficial to convert a load of a constant
21214/// to just the constant itself.
21216 Type *Ty) const {
21217 assert(Ty->isIntegerTy());
21218
21219 unsigned Bits = Ty->getPrimitiveSizeInBits();
21220 if (Bits == 0 || Bits > 32)
21221 return false;
21222 return true;
21223}
21224
21226 unsigned Index) const {
21228 return false;
21229
21230 return (Index == 0 || Index == ResVT.getVectorNumElements());
21231}
21232
21234 ARM_MB::MemBOpt Domain) const {
21235 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21236
21237 // First, if the target has no DMB, see what fallback we can use.
21238 if (!Subtarget->hasDataBarrier()) {
21239 // Some ARMv6 cpus can support data barriers with an mcr instruction.
21240 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
21241 // here.
21242 if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
21243 Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
21244 Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
21245 Builder.getInt32(0), Builder.getInt32(7),
21246 Builder.getInt32(10), Builder.getInt32(5)};
21247 return Builder.CreateCall(MCR, args);
21248 } else {
21249 // Instead of using barriers, atomic accesses on these subtargets use
21250 // libcalls.
21251 llvm_unreachable("makeDMB on a target so old that it has no barriers");
21252 }
21253 } else {
21254 Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
21255 // Only a full system barrier exists in the M-class architectures.
21256 Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
21257 Constant *CDomain = Builder.getInt32(Domain);
21258 return Builder.CreateCall(DMB, CDomain);
21259 }
21260}
21261
21262// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
21264 Instruction *Inst,
21265 AtomicOrdering Ord) const {
21266 switch (Ord) {
21269 llvm_unreachable("Invalid fence: unordered/non-atomic");
21272 return nullptr; // Nothing to do
21274 if (!Inst->hasAtomicStore())
21275 return nullptr; // Nothing to do
21276 [[fallthrough]];
21279 if (Subtarget->preferISHSTBarriers())
21280 return makeDMB(Builder, ARM_MB::ISHST);
21281 // FIXME: add a comment with a link to documentation justifying this.
21282 else
21283 return makeDMB(Builder, ARM_MB::ISH);
21284 }
21285 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
21286}
21287
21289 Instruction *Inst,
21290 AtomicOrdering Ord) const {
21291 switch (Ord) {
21294 llvm_unreachable("Invalid fence: unordered/not-atomic");
21297 return nullptr; // Nothing to do
21301 return makeDMB(Builder, ARM_MB::ISH);
21302 }
21303 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
21304}
21305
21306// Loads and stores less than 64-bits are already atomic; ones above that
21307// are doomed anyway, so defer to the default libcall and blame the OS when
21308// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21309// anything for those.
21312 bool has64BitAtomicStore;
21313 if (Subtarget->isMClass())
21314 has64BitAtomicStore = false;
21315 else if (Subtarget->isThumb())
21316 has64BitAtomicStore = Subtarget->hasV7Ops();
21317 else
21318 has64BitAtomicStore = Subtarget->hasV6Ops();
21319
21320 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
21321 return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand
21323}
21324
21325// Loads and stores less than 64-bits are already atomic; ones above that
21326// are doomed anyway, so defer to the default libcall and blame the OS when
21327// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21328// anything for those.
21329// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
21330// guarantee, see DDI0406C ARM architecture reference manual,
21331// sections A8.8.72-74 LDRD)
21334 bool has64BitAtomicLoad;
21335 if (Subtarget->isMClass())
21336 has64BitAtomicLoad = false;
21337 else if (Subtarget->isThumb())
21338 has64BitAtomicLoad = Subtarget->hasV7Ops();
21339 else
21340 has64BitAtomicLoad = Subtarget->hasV6Ops();
21341
21342 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
21343 return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly
21345}
21346
21347// For the real atomic operations, we have ldrex/strex up to 32 bits,
21348// and up to 64 bits on the non-M profiles
21351 if (AI->isFloatingPointOperation())
21353
21354 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21355 bool hasAtomicRMW;
21356 if (Subtarget->isMClass())
21357 hasAtomicRMW = Subtarget->hasV8MBaselineOps();
21358 else if (Subtarget->isThumb())
21359 hasAtomicRMW = Subtarget->hasV7Ops();
21360 else
21361 hasAtomicRMW = Subtarget->hasV6Ops();
21362 if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
21363 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21364 // implement atomicrmw without spilling. If the target address is also on
21365 // the stack and close enough to the spill slot, this can lead to a
21366 // situation where the monitor always gets cleared and the atomic operation
21367 // can never succeed. So at -O0 lower this operation to a CAS loop.
21368 if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None)
21371 }
21373}
21374
21375// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
21376// bits, and up to 64 bits on the non-M profiles.
21379 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21380 // implement cmpxchg without spilling. If the address being exchanged is also
21381 // on the stack and close enough to the spill slot, this can lead to a
21382 // situation where the monitor always gets cleared and the atomic operation
21383 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
21384 unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
21385 bool HasAtomicCmpXchg;
21386 if (Subtarget->isMClass())
21387 HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
21388 else if (Subtarget->isThumb())
21389 HasAtomicCmpXchg = Subtarget->hasV7Ops();
21390 else
21391 HasAtomicCmpXchg = Subtarget->hasV6Ops();
21392 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None &&
21393 HasAtomicCmpXchg && Size <= (Subtarget->isMClass() ? 32U : 64U))
21396}
21397
21399 const Instruction *I) const {
21400 return InsertFencesForAtomic;
21401}
21402
21404 // ROPI/RWPI are not supported currently.
21405 return !Subtarget->isROPI() && !Subtarget->isRWPI();
21406}
21407
21409 if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21411
21412 // MSVC CRT has a global variable holding security cookie.
21413 M.getOrInsertGlobal("__security_cookie",
21414 PointerType::getUnqual(M.getContext()));
21415
21416 // MSVC CRT has a function to validate security cookie.
21417 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
21418 "__security_check_cookie", Type::getVoidTy(M.getContext()),
21419 PointerType::getUnqual(M.getContext()));
21420 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
21421 F->addParamAttr(0, Attribute::AttrKind::InReg);
21422}
21423
21425 // MSVC CRT has a global variable holding security cookie.
21426 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21427 return M.getGlobalVariable("__security_cookie");
21429}
21430
21432 // MSVC CRT has a function to validate security cookie.
21433 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21434 return M.getFunction("__security_check_cookie");
21436}
21437
21439 unsigned &Cost) const {
21440 // If we do not have NEON, vector types are not natively supported.
21441 if (!Subtarget->hasNEON())
21442 return false;
21443
21444 // Floating point values and vector values map to the same register file.
21445 // Therefore, although we could do a store extract of a vector type, this is
21446 // better to leave at float as we have more freedom in the addressing mode for
21447 // those.
21448 if (VectorTy->isFPOrFPVectorTy())
21449 return false;
21450
21451 // If the index is unknown at compile time, this is very expensive to lower
21452 // and it is not possible to combine the store with the extract.
21453 if (!isa<ConstantInt>(Idx))
21454 return false;
21455
21456 assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
21457 unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedValue();
21458 // We can do a store + vector extract on any vector that fits perfectly in a D
21459 // or Q register.
21460 if (BitWidth == 64 || BitWidth == 128) {
21461 Cost = 0;
21462 return true;
21463 }
21464 return false;
21465}
21466
21468 return Subtarget->hasV6T2Ops();
21469}
21470
21472 return Subtarget->hasV6T2Ops();
21473}
21474
21476 const Instruction &AndI) const {
21477 if (!Subtarget->hasV7Ops())
21478 return false;
21479
21480 // Sink the `and` instruction only if the mask would fit into a modified
21481 // immediate operand.
21482 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
21483 if (!Mask || Mask->getValue().getBitWidth() > 32u)
21484 return false;
21485 auto MaskVal = unsigned(Mask->getValue().getZExtValue());
21486 return (Subtarget->isThumb2() ? ARM_AM::getT2SOImmVal(MaskVal)
21487 : ARM_AM::getSOImmVal(MaskVal)) != -1;
21488}
21489
21492 SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const {
21493 if (Subtarget->hasMinSize() && !Subtarget->isTargetWindows())
21496 ExpansionFactor);
21497}
21498
21500 Value *Addr,
21501 AtomicOrdering Ord) const {
21502 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21503 bool IsAcquire = isAcquireOrStronger(Ord);
21504
21505 // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
21506 // intrinsic must return {i32, i32} and we have to recombine them into a
21507 // single i64 here.
21508 if (ValueTy->getPrimitiveSizeInBits() == 64) {
21510 IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
21512
21513 Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
21514
21515 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
21516 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
21517 if (!Subtarget->isLittle())
21518 std::swap (Lo, Hi);
21519 Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
21520 Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
21521 return Builder.CreateOr(
21522 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 32)), "val64");
21523 }
21524
21525 Type *Tys[] = { Addr->getType() };
21526 Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
21527 Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
21528 CallInst *CI = Builder.CreateCall(Ldrex, Addr);
21529
21530 CI->addParamAttr(
21531 0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));
21532 return Builder.CreateTruncOrBitCast(CI, ValueTy);
21533}
21534
21536 IRBuilderBase &Builder) const {
21537 if (!Subtarget->hasV7Ops())
21538 return;
21539 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21540 Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
21541}
21542
21544 Value *Val, Value *Addr,
21545 AtomicOrdering Ord) const {
21546 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21547 bool IsRelease = isReleaseOrStronger(Ord);
21548
21549 // Since the intrinsics must have legal type, the i64 intrinsics take two
21550 // parameters: "i32, i32". We must marshal Val into the appropriate form
21551 // before the call.
21552 if (Val->getType()->getPrimitiveSizeInBits() == 64) {
21554 IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
21556 Type *Int32Ty = Type::getInt32Ty(M->getContext());
21557
21558 Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
21559 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
21560 if (!Subtarget->isLittle())
21561 std::swap(Lo, Hi);
21562 return Builder.CreateCall(Strex, {Lo, Hi, Addr});
21563 }
21564
21565 Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
21566 Type *Tys[] = { Addr->getType() };
21567 Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
21568
21569 CallInst *CI = Builder.CreateCall(
21570 Strex, {Builder.CreateZExtOrBitCast(
21571 Val, Strex->getFunctionType()->getParamType(0)),
21572 Addr});
21573 CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType,
21574 Val->getType()));
21575 return CI;
21576}
21577
21578
21580 return Subtarget->isMClass();
21581}
21582
21583/// A helper function for determining the number of interleaved accesses we
21584/// will generate when lowering accesses of the given type.
21585unsigned
21587 const DataLayout &DL) const {
21588 return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
21589}
21590
21592 unsigned Factor, FixedVectorType *VecTy, Align Alignment,
21593 const DataLayout &DL) const {
21594
21595 unsigned VecSize = DL.getTypeSizeInBits(VecTy);
21596 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
21597
21598 if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())
21599 return false;
21600
21601 // Ensure the vector doesn't have f16 elements. Even though we could do an
21602 // i16 vldN, we can't hold the f16 vectors and will end up converting via
21603 // f32.
21604 if (Subtarget->hasNEON() && VecTy->getElementType()->isHalfTy())
21605 return false;
21606 if (Subtarget->hasMVEIntegerOps() && Factor == 3)
21607 return false;
21608
21609 // Ensure the number of vector elements is greater than 1.
21610 if (VecTy->getNumElements() < 2)
21611 return false;
21612
21613 // Ensure the element type is legal.
21614 if (ElSize != 8 && ElSize != 16 && ElSize != 32)
21615 return false;
21616 // And the alignment if high enough under MVE.
21617 if (Subtarget->hasMVEIntegerOps() && Alignment < ElSize / 8)
21618 return false;
21619
21620 // Ensure the total vector size is 64 or a multiple of 128. Types larger than
21621 // 128 will be split into multiple interleaved accesses.
21622 if (Subtarget->hasNEON() && VecSize == 64)
21623 return true;
21624 return VecSize % 128 == 0;
21625}
21626
21628 if (Subtarget->hasNEON())
21629 return 4;
21630 if (Subtarget->hasMVEIntegerOps())
21633}
21634
21635/// Lower an interleaved load into a vldN intrinsic.
21636///
21637/// E.g. Lower an interleaved load (Factor = 2):
21638/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
21639/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21640/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21641///
21642/// Into:
21643/// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
21644/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
21645/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
21648 ArrayRef<unsigned> Indices, unsigned Factor) const {
21649 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21650 "Invalid interleave factor");
21651 assert(!Shuffles.empty() && "Empty shufflevector input");
21652 assert(Shuffles.size() == Indices.size() &&
21653 "Unmatched number of shufflevectors and indices");
21654
21655 auto *VecTy = cast<FixedVectorType>(Shuffles[0]->getType());
21656 Type *EltTy = VecTy->getElementType();
21657
21658 const DataLayout &DL = LI->getModule()->getDataLayout();
21659 Align Alignment = LI->getAlign();
21660
21661 // Skip if we do not have NEON and skip illegal vector types. We can
21662 // "legalize" wide vector types into multiple interleaved accesses as long as
21663 // the vector types are divisible by 128.
21664 if (!isLegalInterleavedAccessType(Factor, VecTy, Alignment, DL))
21665 return false;
21666
21667 unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
21668
21669 // A pointer vector can not be the return type of the ldN intrinsics. Need to
21670 // load integer vectors first and then convert to pointer vectors.
21671 if (EltTy->isPointerTy())
21672 VecTy = FixedVectorType::get(DL.getIntPtrType(EltTy), VecTy);
21673
21674 IRBuilder<> Builder(LI);
21675
21676 // The base address of the load.
21677 Value *BaseAddr = LI->getPointerOperand();
21678
21679 if (NumLoads > 1) {
21680 // If we're going to generate more than one load, reset the sub-vector type
21681 // to something legal.
21682 VecTy = FixedVectorType::get(VecTy->getElementType(),
21683 VecTy->getNumElements() / NumLoads);
21684 }
21685
21686 assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
21687
21688 auto createLoadIntrinsic = [&](Value *BaseAddr) {
21689 if (Subtarget->hasNEON()) {
21690 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21691 Type *Tys[] = {VecTy, PtrTy};
21692 static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
21693 Intrinsic::arm_neon_vld3,
21694 Intrinsic::arm_neon_vld4};
21695 Function *VldnFunc =
21696 Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
21697
21699 Ops.push_back(BaseAddr);
21700 Ops.push_back(Builder.getInt32(LI->getAlign().value()));
21701
21702 return Builder.CreateCall(VldnFunc, Ops, "vldN");
21703 } else {
21704 assert((Factor == 2 || Factor == 4) &&
21705 "expected interleave factor of 2 or 4 for MVE");
21706 Intrinsic::ID LoadInts =
21707 Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
21708 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21709 Type *Tys[] = {VecTy, PtrTy};
21710 Function *VldnFunc =
21711 Intrinsic::getDeclaration(LI->getModule(), LoadInts, Tys);
21712
21714 Ops.push_back(BaseAddr);
21715 return Builder.CreateCall(VldnFunc, Ops, "vldN");
21716 }
21717 };
21718
21719 // Holds sub-vectors extracted from the load intrinsic return values. The
21720 // sub-vectors are associated with the shufflevector instructions they will
21721 // replace.
21723
21724 for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
21725 // If we're generating more than one load, compute the base address of
21726 // subsequent loads as an offset from the previous.
21727 if (LoadCount > 0)
21728 BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
21729 VecTy->getNumElements() * Factor);
21730
21731 CallInst *VldN = createLoadIntrinsic(BaseAddr);
21732
21733 // Replace uses of each shufflevector with the corresponding vector loaded
21734 // by ldN.
21735 for (unsigned i = 0; i < Shuffles.size(); i++) {
21736 ShuffleVectorInst *SV = Shuffles[i];
21737 unsigned Index = Indices[i];
21738
21739 Value *SubVec = Builder.CreateExtractValue(VldN, Index);
21740
21741 // Convert the integer vector to pointer vector if the element is pointer.
21742 if (EltTy->isPointerTy())
21743 SubVec = Builder.CreateIntToPtr(
21744 SubVec,
21746
21747 SubVecs[SV].push_back(SubVec);
21748 }
21749 }
21750
21751 // Replace uses of the shufflevector instructions with the sub-vectors
21752 // returned by the load intrinsic. If a shufflevector instruction is
21753 // associated with more than one sub-vector, those sub-vectors will be
21754 // concatenated into a single wide vector.
21755 for (ShuffleVectorInst *SVI : Shuffles) {
21756 auto &SubVec = SubVecs[SVI];
21757 auto *WideVec =
21758 SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
21759 SVI->replaceAllUsesWith(WideVec);
21760 }
21761
21762 return true;
21763}
21764
21765/// Lower an interleaved store into a vstN intrinsic.
21766///
21767/// E.g. Lower an interleaved store (Factor = 3):
21768/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21769/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21770/// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
21771///
21772/// Into:
21773/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21774/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21775/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21776/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21777///
21778/// Note that the new shufflevectors will be removed and we'll only generate one
21779/// vst3 instruction in CodeGen.
21780///
21781/// Example for a more general valid mask (Factor 3). Lower:
21782/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
21783/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
21784/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21785///
21786/// Into:
21787/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
21788/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
21789/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
21790/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21792 ShuffleVectorInst *SVI,
21793 unsigned Factor) const {
21794 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21795 "Invalid interleave factor");
21796
21797 auto *VecTy = cast<FixedVectorType>(SVI->getType());
21798 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
21799
21800 unsigned LaneLen = VecTy->getNumElements() / Factor;
21801 Type *EltTy = VecTy->getElementType();
21802 auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
21803
21804 const DataLayout &DL = SI->getModule()->getDataLayout();
21805 Align Alignment = SI->getAlign();
21806
21807 // Skip if we do not have NEON and skip illegal vector types. We can
21808 // "legalize" wide vector types into multiple interleaved accesses as long as
21809 // the vector types are divisible by 128.
21810 if (!isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, DL))
21811 return false;
21812
21813 unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
21814
21815 Value *Op0 = SVI->getOperand(0);
21816 Value *Op1 = SVI->getOperand(1);
21817 IRBuilder<> Builder(SI);
21818
21819 // StN intrinsics don't support pointer vectors as arguments. Convert pointer
21820 // vectors to integer vectors.
21821 if (EltTy->isPointerTy()) {
21822 Type *IntTy = DL.getIntPtrType(EltTy);
21823
21824 // Convert to the corresponding integer vector.
21825 auto *IntVecTy =
21826 FixedVectorType::get(IntTy, cast<FixedVectorType>(Op0->getType()));
21827 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
21828 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
21829
21830 SubVecTy = FixedVectorType::get(IntTy, LaneLen);
21831 }
21832
21833 // The base address of the store.
21834 Value *BaseAddr = SI->getPointerOperand();
21835
21836 if (NumStores > 1) {
21837 // If we're going to generate more than one store, reset the lane length
21838 // and sub-vector type to something legal.
21839 LaneLen /= NumStores;
21840 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
21841 }
21842
21843 assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
21844
21845 auto Mask = SVI->getShuffleMask();
21846
21847 auto createStoreIntrinsic = [&](Value *BaseAddr,
21848 SmallVectorImpl<Value *> &Shuffles) {
21849 if (Subtarget->hasNEON()) {
21850 static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
21851 Intrinsic::arm_neon_vst3,
21852 Intrinsic::arm_neon_vst4};
21853 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21854 Type *Tys[] = {PtrTy, SubVecTy};
21855
21857 SI->getModule(), StoreInts[Factor - 2], Tys);
21858
21860 Ops.push_back(BaseAddr);
21861 append_range(Ops, Shuffles);
21862 Ops.push_back(Builder.getInt32(SI->getAlign().value()));
21863 Builder.CreateCall(VstNFunc, Ops);
21864 } else {
21865 assert((Factor == 2 || Factor == 4) &&
21866 "expected interleave factor of 2 or 4 for MVE");
21867 Intrinsic::ID StoreInts =
21868 Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
21869 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21870 Type *Tys[] = {PtrTy, SubVecTy};
21871 Function *VstNFunc =
21872 Intrinsic::getDeclaration(SI->getModule(), StoreInts, Tys);
21873
21875 Ops.push_back(BaseAddr);
21876 append_range(Ops, Shuffles);
21877 for (unsigned F = 0; F < Factor; F++) {
21878 Ops.push_back(Builder.getInt32(F));
21879 Builder.CreateCall(VstNFunc, Ops);
21880 Ops.pop_back();
21881 }
21882 }
21883 };
21884
21885 for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
21886 // If we generating more than one store, we compute the base address of
21887 // subsequent stores as an offset from the previous.
21888 if (StoreCount > 0)
21889 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
21890 BaseAddr, LaneLen * Factor);
21891
21892 SmallVector<Value *, 4> Shuffles;
21893
21894 // Split the shufflevector operands into sub vectors for the new vstN call.
21895 for (unsigned i = 0; i < Factor; i++) {
21896 unsigned IdxI = StoreCount * LaneLen * Factor + i;
21897 if (Mask[IdxI] >= 0) {
21898 Shuffles.push_back(Builder.CreateShuffleVector(
21899 Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
21900 } else {
21901 unsigned StartMask = 0;
21902 for (unsigned j = 1; j < LaneLen; j++) {
21903 unsigned IdxJ = StoreCount * LaneLen * Factor + j;
21904 if (Mask[IdxJ * Factor + IdxI] >= 0) {
21905 StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
21906 break;
21907 }
21908 }
21909 // Note: If all elements in a chunk are undefs, StartMask=0!
21910 // Note: Filling undef gaps with random elements is ok, since
21911 // those elements were being written anyway (with undefs).
21912 // In the case of all undefs we're defaulting to using elems from 0
21913 // Note: StartMask cannot be negative, it's checked in
21914 // isReInterleaveMask
21915 Shuffles.push_back(Builder.CreateShuffleVector(
21916 Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
21917 }
21918 }
21919
21920 createStoreIntrinsic(BaseAddr, Shuffles);
21921 }
21922 return true;
21923}
21924
21932
21934 uint64_t &Members) {
21935 if (auto *ST = dyn_cast<StructType>(Ty)) {
21936 for (unsigned i = 0; i < ST->getNumElements(); ++i) {
21937 uint64_t SubMembers = 0;
21938 if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
21939 return false;
21940 Members += SubMembers;
21941 }
21942 } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
21943 uint64_t SubMembers = 0;
21944 if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
21945 return false;
21946 Members += SubMembers * AT->getNumElements();
21947 } else if (Ty->isFloatTy()) {
21948 if (Base != HA_UNKNOWN && Base != HA_FLOAT)
21949 return false;
21950 Members = 1;
21951 Base = HA_FLOAT;
21952 } else if (Ty->isDoubleTy()) {
21953 if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
21954 return false;
21955 Members = 1;
21956 Base = HA_DOUBLE;
21957 } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
21958 Members = 1;
21959 switch (Base) {
21960 case HA_FLOAT:
21961 case HA_DOUBLE:
21962 return false;
21963 case HA_VECT64:
21964 return VT->getPrimitiveSizeInBits().getFixedValue() == 64;
21965 case HA_VECT128:
21966 return VT->getPrimitiveSizeInBits().getFixedValue() == 128;
21967 case HA_UNKNOWN:
21968 switch (VT->getPrimitiveSizeInBits().getFixedValue()) {
21969 case 64:
21970 Base = HA_VECT64;
21971 return true;
21972 case 128:
21973 Base = HA_VECT128;
21974 return true;
21975 default:
21976 return false;
21977 }
21978 }
21979 }
21980
21981 return (Members > 0 && Members <= 4);
21982}
21983
21984/// Return the correct alignment for the current calling convention.
21986 Type *ArgTy, const DataLayout &DL) const {
21987 const Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
21988 if (!ArgTy->isVectorTy())
21989 return ABITypeAlign;
21990
21991 // Avoid over-aligning vector parameters. It would require realigning the
21992 // stack and waste space for no real benefit.
21993 return std::min(ABITypeAlign, DL.getStackAlignment());
21994}
21995
21996/// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
21997/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
21998/// passing according to AAPCS rules.
22000 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
22001 const DataLayout &DL) const {
22002 if (getEffectiveCallingConv(CallConv, isVarArg) !=
22004 return false;
22005
22007 uint64_t Members = 0;
22008 bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
22009 LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
22010
22011 bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
22012 return IsHA || IsIntArray;
22013}
22014
22016 const Constant *PersonalityFn) const {
22017 // Platforms which do not use SjLj EH may return values in these registers
22018 // via the personality function.
22019 return Subtarget->useSjLjEH() ? Register() : ARM::R0;
22020}
22021
22023 const Constant *PersonalityFn) const {
22024 // Platforms which do not use SjLj EH may return values in these registers
22025 // via the personality function.
22026 return Subtarget->useSjLjEH() ? Register() : ARM::R1;
22027}
22028
22029void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
22030 // Update IsSplitCSR in ARMFunctionInfo.
22031 ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
22032 AFI->setIsSplitCSR(true);
22033}
22034
22035void ARMTargetLowering::insertCopiesSplitCSR(
22036 MachineBasicBlock *Entry,
22037 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
22038 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
22039 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
22040 if (!IStart)
22041 return;
22042
22043 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
22044 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
22045 MachineBasicBlock::iterator MBBI = Entry->begin();
22046 for (const MCPhysReg *I = IStart; *I; ++I) {
22047 const TargetRegisterClass *RC = nullptr;
22048 if (ARM::GPRRegClass.contains(*I))
22049 RC = &ARM::GPRRegClass;
22050 else if (ARM::DPRRegClass.contains(*I))
22051 RC = &ARM::DPRRegClass;
22052 else
22053 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
22054
22055 Register NewVR = MRI->createVirtualRegister(RC);
22056 // Create copy from CSR to a virtual register.
22057 // FIXME: this currently does not emit CFI pseudo-instructions, it works
22058 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
22059 // nounwind. If we want to generalize this later, we may need to emit
22060 // CFI pseudo-instructions.
22061 assert(Entry->getParent()->getFunction().hasFnAttribute(
22062 Attribute::NoUnwind) &&
22063 "Function should be nounwind in insertCopiesSplitCSR!");
22064 Entry->addLiveIn(*I);
22065 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
22066 .addReg(*I);
22067
22068 // Insert the copy-back instructions right before the terminator.
22069 for (auto *Exit : Exits)
22070 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
22071 TII->get(TargetOpcode::COPY), *I)
22072 .addReg(NewVR);
22073 }
22074}
22075
22079}
22080
22082 return Subtarget->hasMVEIntegerOps();
22083}
22084
22087 auto *VTy = dyn_cast<FixedVectorType>(Ty);
22088 if (!VTy)
22089 return false;
22090
22091 auto *ScalarTy = VTy->getScalarType();
22092 unsigned NumElements = VTy->getNumElements();
22093
22094 unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
22095 if (VTyWidth < 128 || !llvm::isPowerOf2_32(VTyWidth))
22096 return false;
22097
22098 // Both VCADD and VCMUL/VCMLA support the same types, F16 and F32
22099 if (ScalarTy->isHalfTy() || ScalarTy->isFloatTy())
22100 return Subtarget->hasMVEFloatOps();
22101
22103 return false;
22104
22105 return Subtarget->hasMVEIntegerOps() &&
22106 (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
22107 ScalarTy->isIntegerTy(32));
22108}
22109
22112 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
22113 Value *Accumulator) const {
22114
22115 FixedVectorType *Ty = cast<FixedVectorType>(InputA->getType());
22116
22117 unsigned TyWidth = Ty->getScalarSizeInBits() * Ty->getNumElements();
22118
22119 assert(TyWidth >= 128 && "Width of vector type must be at least 128 bits");
22120
22121 if (TyWidth > 128) {
22122 int Stride = Ty->getNumElements() / 2;
22123 auto SplitSeq = llvm::seq<int>(0, Ty->getNumElements());
22124 auto SplitSeqVec = llvm::to_vector(SplitSeq);
22125 ArrayRef<int> LowerSplitMask(&SplitSeqVec[0], Stride);
22126 ArrayRef<int> UpperSplitMask(&SplitSeqVec[Stride], Stride);
22127
22128 auto *LowerSplitA = B.CreateShuffleVector(InputA, LowerSplitMask);
22129 auto *LowerSplitB = B.CreateShuffleVector(InputB, LowerSplitMask);
22130 auto *UpperSplitA = B.CreateShuffleVector(InputA, UpperSplitMask);
22131 auto *UpperSplitB = B.CreateShuffleVector(InputB, UpperSplitMask);
22132 Value *LowerSplitAcc = nullptr;
22133 Value *UpperSplitAcc = nullptr;
22134
22135 if (Accumulator) {
22136 LowerSplitAcc = B.CreateShuffleVector(Accumulator, LowerSplitMask);
22137 UpperSplitAcc = B.CreateShuffleVector(Accumulator, UpperSplitMask);
22138 }
22139
22140 auto *LowerSplitInt = createComplexDeinterleavingIR(
22141 B, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
22142 auto *UpperSplitInt = createComplexDeinterleavingIR(
22143 B, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
22144
22145 ArrayRef<int> JoinMask(&SplitSeqVec[0], Ty->getNumElements());
22146 return B.CreateShuffleVector(LowerSplitInt, UpperSplitInt, JoinMask);
22147 }
22148
22149 auto *IntTy = Type::getInt32Ty(B.getContext());
22150
22151 ConstantInt *ConstRotation = nullptr;
22152 if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {
22153 ConstRotation = ConstantInt::get(IntTy, (int)Rotation);
22154
22155 if (Accumulator)
22156 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmlaq, Ty,
22157 {ConstRotation, Accumulator, InputB, InputA});
22158 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmulq, Ty,
22159 {ConstRotation, InputB, InputA});
22160 }
22161
22162 if (OperationType == ComplexDeinterleavingOperation::CAdd) {
22163 // 1 means the value is not halved.
22164 auto *ConstHalving = ConstantInt::get(IntTy, 1);
22165
22167 ConstRotation = ConstantInt::get(IntTy, 0);
22169 ConstRotation = ConstantInt::get(IntTy, 1);
22170
22171 if (!ConstRotation)
22172 return nullptr; // Invalid rotation for arm_mve_vcaddq
22173
22174 return B.CreateIntrinsic(Intrinsic::arm_mve_vcaddq, Ty,
22175 {ConstHalving, ConstRotation, InputA, InputB});
22176 }
22177
22178 return nullptr;
22179}
unsigned const MachineRegisterInfo * MRI
static bool isAddSubSExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isExtendedBUILD_VECTOR(SDValue N, SelectionDAG &DAG, bool isSigned)
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
static bool areExtractExts(Value *Ext1, Value *Ext2)
Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements.
static EVT getExtensionTo64Bits(const EVT &OrigVT)
static const MCPhysReg GPRArgRegs[]
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
#define MAKE_CASE(V)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
static bool isAddSubZExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static bool isConstant(const MachineInstr &MI)
static const LLT S1
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG)
static bool isStore(int Opcode)
static bool isThumb(const MCSubtargetInfo &STI)
static SDValue LowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII)
MatchingStackOffset - Return true if the given stack call argument is already available in the same p...
static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG)
static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
@ HA_DOUBLE
@ HA_VECT128
@ HA_VECT64
@ HA_FLOAT
@ HA_UNKNOWN
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total value size to 64 bits.
static cl::opt< unsigned > ConstpoolPromotionMaxSize("arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), cl::init(64))
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isVTBLMask(ArrayRef< int > M, EVT VT)
static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
static cl::opt< bool > EnableConstpoolPromotion("arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), cl::init(false))
static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformExtractFpToIntStores(StoreSDNode *St, SelectionDAG &DAG)
static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
static SDValue PerformExtractEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static const APInt * isPowerOf2Constant(SDValue V)
static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) can replace combinations of ...
static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
static SDValue LowerVECTOR_SHUFFLEUsingOneOff(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static bool isValidMVECond(unsigned CC, bool IsFloat)
static SDValue PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC)
IntCCToARMCC - Convert a DAG integer condition code to an ARM CC.
static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSTORECombine - Target-specific dag combine xforms for ISD::STORE.
static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, SelectionDAG &DAG)
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isGTorGE(ISD::CondCode CC)
static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) intrinsic,...
static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask)
static bool isReverseMask(ArrayRef< int > M, EVT VT)
static bool isVZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of "vector_shuffle v,...
static SDValue PerformSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVECTOR_REG_CASTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG)
static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc)
static bool isVTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool CanInvertMVEVCMP(SDValue N)
static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG)
static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
PerformShiftCombine - Checks for immediate versions of vector shifts and lowers them.
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2)
FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static EVT getVectorTyFromPredicateVector(EVT VT)
static SDValue PerformFADDVCMLACombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
static bool isSRL16(const SDValue &Op)
static SDValue PerformVMOVrhCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC)
static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr, SDValue Inc, const SelectionDAG &DAG)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static Register genTPEntry(MachineBasicBlock *TpEntry, MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpExit, Register OpSizeReg, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI)
Adds logic in loop entry MBB to calculate loop iteration count and adds t2WhileLoopSetup and t2WhileL...
static bool isLTorLE(ISD::CondCode CC)
static SDValue PerformVCMPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformMVEVMULLCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, SelectionDAG &DAG)
static SDValue PerformBITCASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG)
static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock *BB, const TargetRegisterInfo *TRI)
static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG)
static bool hasNormalLoadOperand(SDNode *N)
hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node are normal,...
static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformInsertEltCombine - Target-specific dag combine xforms for ISD::INSERT_VECTOR_ELT.
static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVDUPLANECombine - Target-specific dag combine xforms for ARMISD::VDUPLANE.
static SDValue LowerBuildVectorOfFPTrunc(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static cl::opt< unsigned > ConstpoolPromotionMaxTotal("arm-promote-constant-max-total", cl::Hidden, cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128))
static SDValue LowerTruncatei1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static RTLIB::Libcall getDivRemLibcall(const SDNode *N, MVT::SimpleValueType SVT)
static SDValue PerformABSCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG &DAG)
SkipLoadExtensionForVMULL - return a load of the original vector size that does not do any sign/zero ...
static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static const MCPhysReg GPRArgRegs[]
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.
static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT, SelectionDAG &DAG)
static bool isVZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformORCombineToSMULWBT(SDNode *OR, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isVTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of "vector_shuffle v,...
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue FindBFIToCombineWith(SDNode *N)
static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, SelectionDAG &DAG)
ShuffleOpCodes
@ OP_VEXT3
@ OP_VTRNR
@ OP_VDUP1
@ OP_VZIPR
@ OP_VUZPR
@ OP_VREV
@ OP_VZIPL
@ OP_VTRNL
@ OP_COPY
@ OP_VEXT1
@ OP_VDUP0
@ OP_VEXT2
@ OP_VUZPL
@ OP_VDUP3
@ OP_VDUP2
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps)
static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isS16(const SDValue &Op, SelectionDAG &DAG)
static bool isSRA16(const SDValue &Op)
static SDValue AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue LowerInterruptReturn(SmallVectorImpl< SDValue > &RetOps, const SDLoc &DL, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, SelectionDAG &DAG)
static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2)
static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isSHL16(const SDValue &Op)
static bool isVEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseVEXT, unsigned &Imm)
static SDValue PerformMVEVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isTruncMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2)
Return the load opcode for a given load size.
static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
static bool isLegalMVEShuffleOp(unsigned PFEntry)
static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformShuffleVMOVNCombine(ShuffleVectorSDNode *N, SelectionDAG &DAG)
static bool isVUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG)
PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for ISD::VECTOR_SHUFFLE.
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG)
SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, ANY_EXTEND,...
static bool isVMOVNTruncMask(ArrayRef< int > M, EVT ToVT, bool rev)
static SDValue PerformVQMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
static SDValue LowerVecReduceMinMax(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformAddcSubcCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static TargetLowering::ArgListTy getDivRemArgList(const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget)
static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getZeroVector - Returns a vector of specified type with all zero elements.
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static ARMCC::CondCodes getVCMPCondCode(SDValue N)
static cl::opt< bool > ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true))
static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformORCombineToBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVSetCCToVCTPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerBUILD_VECTORToVIDUP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isZeroVector(SDValue N)
static SDValue PerformAddeSubeCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void ReplaceCMP_SWAP_64Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG)
static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment store operation with given size.
static bool isVMOVNMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, NEON load/store intrinsics,...
static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformCSETCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue PerformInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorExtend(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain)
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMULCombine Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the special multi...
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformORCombine - Target-specific dag combine xforms for ISD::OR.
static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG)
static SDValue PerformTruncatingStoreCombine(StoreSDNode *St, SelectionDAG &DAG)
static unsigned SelectPairHalf(unsigned Elements, ArrayRef< int > Mask, unsigned Index)
static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment load operation with given size.
static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG)
static bool isValidBaseUpdate(SDNode *N, SDNode *User)
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, const SDLoc &dl)
static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op)
static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment, bool isSEXTLoad, bool IsMasked, bool isLE, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
std::pair< unsigned, const TargetRegisterClass * > RCPair
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, ISD::ZERO_EXTEND,...
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, EVT VectorVT, VMOVModImmType type)
isVMOVModifiedImm - Check if the specified splat value corresponds to a valid vector constant for a N...
static SDValue LowerBuildVectorOfFPExt(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, SelectionDAG &DAG)
BC is a bitcast that is about to be turned into a VMOVDRR.
static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, const SDLoc &dl)
static unsigned isNEONTwoResultShuffleMask(ArrayRef< int > ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF)
Check if ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), and return the corresponding AR...
static bool BitsProperlyConcatenate(const APInt &A, const APInt &B)
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static SDValue LowerVecReduce(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, struct BaseUpdateUser &User, bool SimpleConstIncOnly, TargetLowering::DAGCombinerInfo &DCI)
static bool allUsersAreInFunction(const Value *V, const Function *F)
Return true if all users of V are within function F, looking through ConstantExprs.
static bool isSingletonVEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG)
PerformVMOVDRRCombine - Target-specific dag combine xforms for ARMISD::VMOVDRR.
static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, SDValue &SatK)
static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
isLegalAddressImmediate - Return true if the integer value can be used as the offset of the target ad...
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isLegalT1AddressImmediate(int64_t V, EVT VT)
static SDValue CombineANDShift(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformADDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDECombine - Target-specific dag combine transform from ARMISD::ADDC, ARMISD::ADDE,...
static SDValue PerformReduceShuffleCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerTruncate(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformHWLoopCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformSplittingMVETruncToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool isVUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of "vector_shuffle v,...
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members)
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) can replace combinations of ...
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG)
static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm, bool &Negate)
static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget)
canChangeToInt - Given the fp compare operand, return true if it is suitable to morph to an integer c...
static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2)
Return the store opcode for a given store size.
static bool IsVUZPShuffleNode(SDNode *N)
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, MachineInstr &MI, const SDNode *Node)
Attaches vregs to MEMCPY that it will use as scratch registers when it is expanded into LDM/STM.
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
static SDValue findMUL_LOHI(SDValue V)
static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformORCombine_i1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformSplittingMVEEXTToWideningLoad(SDNode *N, SelectionDAG &DAG)
static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG)
static void genTPLoopBody(MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI, Register OpSrcReg, Register OpDestReg, Register ElementCountReg, Register TotalIterationsReg, bool IsMemcpy)
Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and t2DoLoopEnd.
static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformBUILD_VECTORCombine - Target-specific dag combine xforms for ISD::BUILD_VECTOR.
static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformMinMaxCombine - Target-specific DAG combining for creating truncating saturates.
This file a TargetTransformInfo::Concept conforming object specific to the ARM target machine.
Function Alias Analysis Results
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:236
static bool isSigned(unsigned int Opcode)
#define Check(C,...)
#define op(i)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Value * LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP)
Emit the code to lower ctpop of V before the specified instruction IP.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
loop Loop Strength Reduction
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
nvptx lower args
uint64_t High
IntegerType * Int32Ty
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
PowerPC Reduce CR logical Operation
const char LLVMTargetMachineRef TM
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This file describes how to lower LLVM code to machine code.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static constexpr int Concat[]
Value * RHS
Value * LHS
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1620
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1548
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
unsigned logBase2() const
Definition: APInt.h:1703
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1606
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
virtual const ARMBaseRegisterInfo & getRegisterInfo() const =0
const uint32_t * getSjLjDispatchPreservedMask(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
Register getFrameRegister(const MachineFunction &MF) const override
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
const uint32_t * getTLSCallPreservedMask(const MachineFunction &MF) const
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that 'returned' is on...
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
SmallPtrSet< const GlobalVariable *, 2 > & getGlobalsPromotedToConstantPool()
void setArgumentStackToRestore(unsigned v)
void setPromotedConstpoolIncrease(int Sz)
void setArgRegsSaveSize(unsigned s)
void setReturnRegsCount(unsigned s)
void setVarArgsFrameIndex(int Index)
unsigned getArgRegsSaveSize() const
void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV)
Indicate to the backend that GV has had its storage changed to inside a constant pool.
void setArgumentStackSize(unsigned size)
unsigned getArgumentStackSize() const
bool isTargetMachO() const
Definition: ARMSubtarget.h:312
bool useMovt() const
bool isTargetAEABI() const
Definition: ARMSubtarget.h:321
bool hasARMOps() const
Definition: ARMSubtarget.h:265
bool supportsTailCall() const
Definition: ARMSubtarget.h:399
const Triple & getTargetTriple() const
Definition: ARMSubtarget.h:298
bool hasVFP4Base() const
Definition: ARMSubtarget.h:273
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:196
bool isThumb1Only() const
Definition: ARMSubtarget.h:364
bool useFPVFMx() const
Definition: ARMSubtarget.h:282
bool hasFPARMv8Base() const
Definition: ARMSubtarget.h:274
bool isThumb2() const
Definition: ARMSubtarget.h:365
bool isTargetWindows() const
Definition: ARMSubtarget.h:308
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
bool hasBaseDSP() const
Definition: ARMSubtarget.h:288
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:200
bool useSjLjEH() const
Definition: ARMSubtarget.h:287
bool isTargetDarwin() const
Definition: ARMSubtarget.h:300
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:208
bool hasVFP2Base() const
Definition: ARMSubtarget.h:271
bool isTargetAndroid() const
Definition: ARMSubtarget.h:350
bool isROPI() const
bool isTargetCOFF() const
Definition: ARMSubtarget.h:310
bool isTargetGNUAEABI() const
Definition: ARMSubtarget.h:326
bool hasVFP3Base() const
Definition: ARMSubtarget.h:272
bool isAPCS_ABI() const
bool useFPVFMx64() const
Definition: ARMSubtarget.h:286
bool isTargetWatchOS() const
Definition: ARMSubtarget.h:302
bool hasMinSize() const
Definition: ARMSubtarget.h:363
bool isTargetIOS() const
Definition: ARMSubtarget.h:301
bool useNEONForSinglePrecisionFP() const
Definition: ARMSubtarget.h:267
const InstrItineraryData * getInstrItineraryData() const override
getInstrItins - Return the instruction itineraries based on subtarget selection.
Definition: ARMSubtarget.h:433
bool isTargetWatchABI() const
Definition: ARMSubtarget.h:303
bool hasAnyDataBarrier() const
Definition: ARMSubtarget.h:276
bool isTargetDriverKit() const
Definition: ARMSubtarget.h:304
bool isAAPCS_ABI() const
bool isRWPI() const
bool isLittle() const
Definition: ARMSubtarget.h:407
bool allowsUnalignedMem() const
Definition: ARMSubtarget.h:401
bool isTargetMuslAEABI() const
Definition: ARMSubtarget.h:331
bool isTargetLinux() const
Definition: ARMSubtarget.h:305
bool useFPVFMx16() const
Definition: ARMSubtarget.h:285
bool isMClass() const
Definition: ARMSubtarget.h:366
unsigned getPrefLoopLogAlignment() const
Definition: ARMSubtarget.h:486
bool isTargetHardFloat() const
bool useMulOps() const
Definition: ARMSubtarget.h:280
bool isTargetELF() const
Definition: ARMSubtarget.h:311
Align getDualLoadStoreAlignment() const
Definition: ARMSubtarget.h:443
bool isReadOnly(const GlobalValue *GV) const
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Align getABIAlignmentForCallingConv(Type *ArgTy, const DataLayout &DL) const override
Return the correct alignment for the current calling convention.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
const ARMSubtarget * getSubtarget() const
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const
bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const
Returns true if the addressing mode representing by AM is legal for the Thumb1 target,...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
SDValue PerformMVETruncCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize=false) const override
isFPImmLegal - Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
SDValue PerformIntrinsicCombine(SDNode *N, DAGCombinerInfo &DCI) const
PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
bool ExpandInlineAsm(CallInst *CI) const override
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const
PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
Value * createComplexDeinterleavingIR(IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator=nullptr) const override
Create the IR node for the given complex deinterleaving operation.
bool isComplexDeinterleavingSupported() const override
Does this target support complex deinterleaving.
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
SDValue PerformMVEExtCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to fold a pair of shifts into a mask.
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &OriginalDemandedBits, const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the value type to use for ISD::SETCC.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
createFastISel - This method returns a target specific FastISel object, or null if the target does no...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
isShuffleMaskLegal - Targets can use this to indicate that they only support some VECTOR_SHUFFLE oper...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
bool useLoadStackGuardNode() const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const override
getRegClassFor - Return the register class that should be used for the specified value type.
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vstN intrinsic.
ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI)
bool isComplexDeinterleavingOperationSupported(ComplexDeinterleavingOperation Operation, Type *Ty) const override
Does this target support complex deinterleaving with the given operation and type.
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const
PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const override
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
Instruction * makeDMB(IRBuilderBase &Builder, ARM_MB::MemBOpt Domain) const
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
const char * LowerXConstraint(EVT ConstraintVT) const override
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
allowsMisalignedMemoryAccesses - Returns true if the target allows unaligned memory accesses of the s...
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
bool isVectorLoadExtDesirable(SDValue ExtVal) const override
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override
Return true if the target can combine store(extractelement VectorTy, Idx).
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vldN intrinsic.
bool useSoftFloat() const override
bool alignLoopsWithOptSize() const override
Should loops be aligned even when the function is marked OptSize (but not MinSize).
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
Returns true if an argument of type Ty needs to be passed in a contiguous block of registers in calli...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPostIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mo...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
bool isFloatingPointOperation() const
Definition: Instructions.h:922
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:93
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
The address of a basic block.
Definition: Constants.h:889
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2,...
CCState - This class holds information needed while lowering arguments and return values.
void getInRegsParamInfo(unsigned InRegsParamRecordIndex, unsigned &BeginReg, unsigned &EndReg) const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
void rewindByValRegsInfo()
unsigned getInRegsParamsProcessed() const
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd)
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
unsigned getInRegsParamsCount() const
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1735
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1819
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1871
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:705
const APFloat & getValueAPF() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
bool isBigEndian() const
Definition: DataLayout.h:239
Align getStackAlignment() const
Definition: DataLayout.h:271
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
Align getPreferredAlign(const GlobalVariable *GV) const
Returns the preferred alignment of the specified global.
StringRef getPrivateGlobalPrefix() const
Definition: DataLayout.h:332
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
bool empty() const
Definition: DenseMap.h:98
iterator begin()
Definition: DenseMap.h:75
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:539
unsigned getNumElements() const
Definition: DerivedTypes.h:582
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:202
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:264
arg_iterator arg_begin()
Definition: Function.h:818
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:356
bool hasStructRetAttr() const
Determine if the function returns a structure through first or second pointer argument.
Definition: Function.h:666
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:215
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:675
const GlobalValue * getGlobal() const
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:278
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:631
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2137
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1881
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2122
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1437
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:486
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1416
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2494
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2117
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1497
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:569
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2153
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
bool isEmpty() const
Returns true if there are no itineraries.
bool hasAtomicStore() const LLVM_READONLY
Return true if this atomic instruction stores to memory.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
const BasicBlock * getParent() const
Definition: Instruction.h:152
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
Class to represent integer types.
Definition: DerivedTypes.h:40
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
static bool LowerToByteSwap(CallInst *CI)
Try to replace a call instruction with a call to a bswap intrinsic.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:184
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:286
Value * getPointerOperand()
Definition: Instructions.h:280
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:600
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:40
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:585
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
void moveAfter(MachineBasicBlock *NewBefore)
void setIsEHPad(bool V=true)
Indicates the block is a landing pad.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void computeMaxCallFrameSize(MachineFunction &MF, std::vector< MachineBasicBlock::iterator > *FrameSDOps=nullptr)
Computes the maximum size of a callframe.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getFunctionContextIndex() const
Return the index for the function context object.
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & reset(Property P)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i....
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
unsigned createJumpTableIndex(const std::vector< MachineBasicBlock * > &DestBBs)
createJumpTableIndex - Create a new jump table.
@ EK_Inline
EK_Inline - Jump table entries are emitted inline at their point of use.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This class is used to represent an MLOAD node.
This class is used to represent an MSTORE node.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
bool isVolatile() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:130
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< use_iterator > uses()
size_t use_size() const
Return the number of uses of this node.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Return true if the type of the node type undefined.
void setFlags(SDNodeFlags NewFlags)
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:474
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:732
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:659
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:739
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
const unsigned char * bytes_end() const
Definition: StringRef.h:118
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
const unsigned char * bytes_begin() const
Definition: StringRef.h:115
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:373
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC)
Override the default CondCode to be used to test the result of the comparison libcall against zero.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
ShiftLegalizationStrategy
Return the preferred strategy to legalize tihs SHIFT instruction, with ExpansionFactor being the recu...
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
const TargetMachine & getTargetMachine() const
void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC)
Set the CallingConv that should be used for the specified libcall.
void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked load does or does not work with the specified type and ind...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
virtual unsigned getMaxSupportedInterleaveFactor() const
Get the maximum supported factor for interleaved memory accesses.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked store does or does not work with the specified type and in...
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
bool isPositionIndependent() const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
TargetOptions Options
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
ObjectFormatType getObjectFormat() const
Get the object format for this triple.
Definition: Triple.h:398
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:662
bool isOSVersionLT(unsigned Major, unsigned Minor=0, unsigned Micro=0) const
Helper function for doing comparisons against version numbers included in the target triple.
Definition: Triple.h:495
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
Definition: Triple.h:629
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:252
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
Type * getArrayElementType() const
Definition: Type.h:404
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
void dump() const
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:143
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:216
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
Type * getElementType() const
Definition: DerivedTypes.h:436
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
@ SECREL
Thread Pointer Offset.
@ SBREL
Section Relative (Windows TLS)
@ GOTTPOFF
Global Offset Table, PC Relative.
@ TPOFF
Global Offset Table, Thread Pointer Offset.
TOF
Target Operand Flag enum.
Definition: ARMBaseInfo.h:242
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
Definition: ARMBaseInfo.h:288
@ MO_SBREL
MO_SBREL - On a symbol operand, this represents a static base relative relocation.
Definition: ARMBaseInfo.h:270
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:275
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
Definition: ARMBaseInfo.h:266
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: ARMBaseInfo.h:263
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
unsigned getAM2Offset(unsigned AM2Opc)
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
unsigned createVMOVModImm(unsigned OpCmode, unsigned Val)
int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
int getFP32FP16Imm(const APInt &Imm)
If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding for it.
AddrOpc getAM2Op(unsigned AM2Opc)
bool isBitFieldInvertedMask(unsigned v)
const unsigned FPStatusBits
const unsigned FPReservedBits
const unsigned RoundingBitsPos
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
@ ARM_APCS
ARM Procedure Calling Standard (obsolete, but still used on some targets).
Definition: CallingConv.h:107
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:82
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
@ ARM_AAPCS
ARM Architecture Procedure Calling Standard calling convention (aka EABI).
Definition: CallingConv.h:111
@ CXX_FAST_TLS
Used for access functions.
Definition: CallingConv.h:72
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition: CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
@ ARM_AAPCS_VFP
Same as ARM_AAPCS, but uses hard floating point ABI.
Definition: CallingConv.h:114
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:751
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:237
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1133
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1129
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:724
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:477
@ SET_FPENV
Sets the current floating-point environment.
Definition: ISDOpcodes.h:1005
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1377
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:147
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:498
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:251
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1276
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:560
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:715
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1162
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1278
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1248
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1279
@ RESET_FPENV
Set floating-point environment to default state.
Definition: ISDOpcodes.h:1009
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1038
@ SET_FPMODE
Sets the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1028
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:784
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:484
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ EH_SJLJ_SETUP_DISPATCH
OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) The target initializes the dispatch table here.
Definition: ISDOpcodes.h:151
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:791
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:544
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1362
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:391
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:689
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1240
@ RESET_FPMODE
Sets default dynamic floating-point control modes.
Definition: ISDOpcodes.h:1032
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:256
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1376
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:478
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:914
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1274
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:230
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1275
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1407
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:886
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:775
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:663
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:621
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1054
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1359
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:723
@ WRITE_REGISTER
Definition: ISDOpcodes.h:119
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1228
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1363
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:995
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:759
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:931
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1084
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:328
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1277
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1063
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:350
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:728
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1244
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:212
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1378
@ RegisterMask
Definition: ISDOpcodes.h:75
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:223
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1158
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:209
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:324
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1371
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:881
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:652
@ GET_FPMODE
Reads the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1023
@ GET_FPENV
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1000
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:706
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:601
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1272
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:574
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:118
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:536
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:781
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1218
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:857
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:743
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1255
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1280
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:972
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:332
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1048
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:799
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:675
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:889
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:737
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:304
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1379
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1270
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:444
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:466
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:443
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:991
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1271
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:837
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1189
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:471
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:681
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1215
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:658
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1360
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:525
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1269
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:870
@ SPONENTRY
SPONENTRY - Represents the llvm.sponentry intrinsic.
Definition: ISDOpcodes.h:106
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:856
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:141
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:787
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1153
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1077
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:764
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:494
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:341
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:516
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1563
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1479
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1530
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1510
static const int LAST_INDEXED_MODE
Definition: ISDOpcodes.h:1481
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1469
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:560
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool CC_ARM_APCS_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:239
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:251
bool FastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
bool RetCC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
void shuffle(Iterator first, Iterator last, RNG &&g)
Definition: STLExtras.h:1541
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
bool CC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:263
bool CC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
bool RetCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_ARM_Win32_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1312
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
CombineLevel
Definition: DAGCombine.h:15
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:244
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool RetFastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
DWARFExpression::Operation Op
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
unsigned gettBLXrOpcode(const MachineFunction &MF)
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Load/store instruction that can be merged with a base address update.
SDNode * N
Instruction that updates a pointer.
unsigned ConstInc
Pointer increment value if it is a constant, or 0 otherwise.
SDValue Inc
Pointer increment operand.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:448
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:455
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:628
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:203
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:438
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:198
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:297
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:63
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:168
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:307
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:176
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition: KnownBits.cpp:57
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:777
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoSignedZeros() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setInRegister(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)