LLVM 22.0.0git
ARMISelLowering.cpp
Go to the documentation of this file.
1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMISelLowering.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
20#include "ARMPerfectShuffle.h"
21#include "ARMRegisterInfo.h"
22#include "ARMSelectionDAGInfo.h"
23#include "ARMSubtarget.h"
27#include "Utils/ARMBaseInfo.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/BitVector.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/Statistic.h"
38#include "llvm/ADT/StringRef.h"
40#include "llvm/ADT/Twine.h"
66#include "llvm/IR/Attributes.h"
67#include "llvm/IR/CallingConv.h"
68#include "llvm/IR/Constant.h"
69#include "llvm/IR/Constants.h"
70#include "llvm/IR/DataLayout.h"
71#include "llvm/IR/DebugLoc.h"
73#include "llvm/IR/Function.h"
74#include "llvm/IR/GlobalAlias.h"
75#include "llvm/IR/GlobalValue.h"
77#include "llvm/IR/IRBuilder.h"
78#include "llvm/IR/InlineAsm.h"
79#include "llvm/IR/Instruction.h"
82#include "llvm/IR/Intrinsics.h"
83#include "llvm/IR/IntrinsicsARM.h"
84#include "llvm/IR/Module.h"
85#include "llvm/IR/Type.h"
86#include "llvm/IR/User.h"
87#include "llvm/IR/Value.h"
88#include "llvm/MC/MCInstrDesc.h"
90#include "llvm/MC/MCSchedule.h"
97#include "llvm/Support/Debug.h"
105#include <algorithm>
106#include <cassert>
107#include <cstdint>
108#include <cstdlib>
109#include <iterator>
110#include <limits>
111#include <optional>
112#include <tuple>
113#include <utility>
114#include <vector>
115
116using namespace llvm;
117
118#define DEBUG_TYPE "arm-isel"
119
120STATISTIC(NumTailCalls, "Number of tail calls");
121STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
122STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
123STATISTIC(NumConstpoolPromoted,
124 "Number of constants with their storage promoted into constant pools");
125
126static cl::opt<bool>
127ARMInterworking("arm-interworking", cl::Hidden,
128 cl::desc("Enable / disable ARM interworking (for debugging only)"),
129 cl::init(true));
130
132 "arm-promote-constant", cl::Hidden,
133 cl::desc("Enable / disable promotion of unnamed_addr constants into "
134 "constant pools"),
135 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
137 "arm-promote-constant-max-size", cl::Hidden,
138 cl::desc("Maximum size of constant to promote into a constant pool"),
139 cl::init(64));
141 "arm-promote-constant-max-total", cl::Hidden,
142 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
143 cl::init(128));
144
146MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
147 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
148 cl::init(2));
149
151 "arm-max-base-updates-to-check", cl::Hidden,
152 cl::desc("Maximum number of base-updates to check generating postindex."),
153 cl::init(64));
154
155/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
156constexpr MVT FlagsVT = MVT::i32;
157
158// The APCS parameter registers.
159static const MCPhysReg GPRArgRegs[] = {
160 ARM::R0, ARM::R1, ARM::R2, ARM::R3
161};
162
164 SelectionDAG &DAG, const SDLoc &DL) {
166 assert(Arg.ArgVT.bitsLT(MVT::i32));
167 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, Arg.ArgVT, Value);
168 SDValue Ext =
170 MVT::i32, Trunc);
171 return Ext;
172}
173
174void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
175 if (VT != PromotedLdStVT) {
176 setOperationAction(ISD::LOAD, VT, Promote);
177 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
178
179 setOperationAction(ISD::STORE, VT, Promote);
180 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
181 }
182
183 MVT ElemTy = VT.getVectorElementType();
184 if (ElemTy != MVT::f64)
188 if (ElemTy == MVT::i32) {
193 } else {
198 }
207 if (VT.isInteger()) {
211 }
212
213 // Neon does not support vector divide/remainder operations.
222
223 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
224 for (auto Opcode : {ISD::ABS, ISD::ABDS, ISD::ABDU, ISD::SMIN, ISD::SMAX,
226 setOperationAction(Opcode, VT, Legal);
227 if (!VT.isFloatingPoint())
228 for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
229 setOperationAction(Opcode, VT, Legal);
230}
231
232void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
233 addRegisterClass(VT, &ARM::DPRRegClass);
234 addTypeForNEON(VT, MVT::f64);
235}
236
237void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
238 addRegisterClass(VT, &ARM::DPairRegClass);
239 addTypeForNEON(VT, MVT::v2f64);
240}
241
242void ARMTargetLowering::setAllExpand(MVT VT) {
243 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
245
246 // We support these really simple operations even on types where all
247 // the actual arithmetic has to be broken down into simpler
248 // operations or turned into library calls.
249 setOperationAction(ISD::BITCAST, VT, Legal);
250 setOperationAction(ISD::LOAD, VT, Legal);
251 setOperationAction(ISD::STORE, VT, Legal);
253}
254
255void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
256 LegalizeAction Action) {
257 setLoadExtAction(ISD::EXTLOAD, From, To, Action);
258 setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
259 setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
260}
261
262void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
263 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
264
265 for (auto VT : IntTypes) {
266 addRegisterClass(VT, &ARM::MQPRRegClass);
280 setOperationAction(ISD::MLOAD, VT, Custom);
281 setOperationAction(ISD::MSTORE, VT, Legal);
296
297 // No native support for these.
307
308 // Vector reductions
309 setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
310 setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);
311 setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
312 setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
313 setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);
314 setOperationAction(ISD::VECREDUCE_MUL, VT, Custom);
315 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
316 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
317 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
318
319 if (!HasMVEFP) {
324 } else {
327 }
328
329 // Pre and Post inc are supported on loads and stores
330 for (unsigned im = (unsigned)ISD::PRE_INC;
331 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
336 }
337 }
338
339 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
340 for (auto VT : FloatTypes) {
341 addRegisterClass(VT, &ARM::MQPRRegClass);
342 if (!HasMVEFP)
343 setAllExpand(VT);
344
345 // These are legal or custom whether we have MVE.fp or not
354 setOperationAction(ISD::MLOAD, VT, Custom);
355 setOperationAction(ISD::MSTORE, VT, Legal);
358
359 // Pre and Post inc are supported on loads and stores
360 for (unsigned im = (unsigned)ISD::PRE_INC;
361 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
366 }
367
368 if (HasMVEFP) {
369 setOperationAction(ISD::FMINNUM, VT, Legal);
370 setOperationAction(ISD::FMAXNUM, VT, Legal);
371 setOperationAction(ISD::FROUND, VT, Legal);
372 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
373 setOperationAction(ISD::FRINT, VT, Legal);
374 setOperationAction(ISD::FTRUNC, VT, Legal);
375 setOperationAction(ISD::FFLOOR, VT, Legal);
376 setOperationAction(ISD::FCEIL, VT, Legal);
377 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
378 setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
379 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
380 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
381
382 // No native support for these.
385 setOperationAction(ISD::FSQRT, VT, Expand);
386 setOperationAction(ISD::FSIN, VT, Expand);
387 setOperationAction(ISD::FCOS, VT, Expand);
388 setOperationAction(ISD::FTAN, VT, Expand);
389 setOperationAction(ISD::FPOW, VT, Expand);
390 setOperationAction(ISD::FLOG, VT, Expand);
391 setOperationAction(ISD::FLOG2, VT, Expand);
392 setOperationAction(ISD::FLOG10, VT, Expand);
393 setOperationAction(ISD::FEXP, VT, Expand);
394 setOperationAction(ISD::FEXP2, VT, Expand);
395 setOperationAction(ISD::FEXP10, VT, Expand);
396 setOperationAction(ISD::FNEARBYINT, VT, Expand);
397 }
398 }
399
400 // Custom Expand smaller than legal vector reductions to prevent false zero
401 // items being added.
402 setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom);
403 setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom);
404 setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom);
405 setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom);
406 setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom);
407 setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom);
408 setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom);
409 setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom);
410
411 // We 'support' these types up to bitcast/load/store level, regardless of
412 // MVE integer-only / float support. Only doing FP data processing on the FP
413 // vector types is inhibited at integer-only level.
414 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
415 for (auto VT : LongTypes) {
416 addRegisterClass(VT, &ARM::MQPRRegClass);
417 setAllExpand(VT);
423 }
425
426 // We can do bitwise operations on v2i64 vectors
427 setOperationAction(ISD::AND, MVT::v2i64, Legal);
428 setOperationAction(ISD::OR, MVT::v2i64, Legal);
429 setOperationAction(ISD::XOR, MVT::v2i64, Legal);
430
431 // It is legal to extload from v4i8 to v4i16 or v4i32.
432 addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
433 addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
434 addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
435
436 // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
442
443 // Some truncating stores are legal too.
444 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
445 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
446 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
447
448 // Pre and Post inc on these are legal, given the correct extends
449 for (unsigned im = (unsigned)ISD::PRE_INC;
450 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
451 for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
456 }
457 }
458
459 // Predicate types
460 const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
461 for (auto VT : pTypes) {
462 addRegisterClass(VT, &ARM::VCCRRegClass);
471 setOperationAction(ISD::LOAD, VT, Custom);
472 setOperationAction(ISD::STORE, VT, Custom);
477
478 if (!HasMVEFP) {
483 }
484 }
488 setOperationAction(ISD::OR, MVT::v2i1, Expand);
494
503}
504
506 return static_cast<const ARMBaseTargetMachine &>(getTargetMachine());
507}
508
510 const ARMSubtarget &STI)
511 : TargetLowering(TM_, STI), Subtarget(&STI),
512 RegInfo(Subtarget->getRegisterInfo()),
513 Itins(Subtarget->getInstrItineraryData()) {
514 const auto &TM = static_cast<const ARMBaseTargetMachine &>(TM_);
515
518
519 const Triple &TT = TM.getTargetTriple();
520
521 if (Subtarget->isThumb1Only())
522 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
523 else
524 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
525
526 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
527 Subtarget->hasFPRegs()) {
528 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
529 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
530
535
536 if (!Subtarget->hasVFP2Base()) {
537 setAllExpand(MVT::f32);
538 } else {
541 setOperationAction(Op, MVT::f32, Legal);
542 }
543 if (!Subtarget->hasFP64()) {
544 setAllExpand(MVT::f64);
545 } else {
548 setOperationAction(Op, MVT::f64, Legal);
549 }
550 }
551
552 if (Subtarget->hasFullFP16()) {
553 addRegisterClass(MVT::f16, &ARM::HPRRegClass);
554 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
555 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
556
557 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
558 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
559 }
560
561 if (Subtarget->hasBF16()) {
562 addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
563 setAllExpand(MVT::bf16);
564 if (!Subtarget->hasFullFP16())
565 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
566 } else {
567 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Expand);
568 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Expand);
569 setOperationAction(ISD::FP_TO_BF16, MVT::f32, Custom);
570 setOperationAction(ISD::FP_TO_BF16, MVT::f64, Custom);
571 }
572
574 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
575 setTruncStoreAction(VT, InnerVT, Expand);
576 addAllExtLoads(VT, InnerVT, Expand);
577 }
578
581
583 }
584
585 if (!Subtarget->isThumb1Only() && !Subtarget->hasV8_1MMainlineOps())
587
588 if (!Subtarget->hasV8_1MMainlineOps())
590
591 if (!Subtarget->isThumb1Only())
593
596
599
600 if (Subtarget->hasMVEIntegerOps())
601 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
602
603 // Combine low-overhead loop intrinsics so that we can lower i1 types.
604 if (Subtarget->hasLOB()) {
605 setTargetDAGCombine({ISD::BRCOND, ISD::BR_CC});
606 }
607
608 if (Subtarget->hasNEON()) {
609 addDRTypeForNEON(MVT::v2f32);
610 addDRTypeForNEON(MVT::v8i8);
611 addDRTypeForNEON(MVT::v4i16);
612 addDRTypeForNEON(MVT::v2i32);
613 addDRTypeForNEON(MVT::v1i64);
614
615 addQRTypeForNEON(MVT::v4f32);
616 addQRTypeForNEON(MVT::v2f64);
617 addQRTypeForNEON(MVT::v16i8);
618 addQRTypeForNEON(MVT::v8i16);
619 addQRTypeForNEON(MVT::v4i32);
620 addQRTypeForNEON(MVT::v2i64);
621
622 if (Subtarget->hasFullFP16()) {
623 addQRTypeForNEON(MVT::v8f16);
624 addDRTypeForNEON(MVT::v4f16);
625 }
626
627 if (Subtarget->hasBF16()) {
628 addQRTypeForNEON(MVT::v8bf16);
629 addDRTypeForNEON(MVT::v4bf16);
630 }
631 }
632
633 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
634 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
635 // none of Neon, MVE or VFP supports any arithmetic operations on it.
636 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
637 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
638 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
639 // FIXME: Code duplication: FDIV and FREM are expanded always, see
640 // ARMTargetLowering::addTypeForNEON method for details.
641 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
642 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
643 // FIXME: Create unittest.
644 // In another words, find a way when "copysign" appears in DAG with vector
645 // operands.
647 // FIXME: Code duplication: SETCC has custom operation action, see
648 // ARMTargetLowering::addTypeForNEON method for details.
650 // FIXME: Create unittest for FNEG and for FABS.
651 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
652 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
653 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
654 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
655 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
656 setOperationAction(ISD::FTAN, MVT::v2f64, Expand);
657 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
658 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
659 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
660 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
661 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
662 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
663 setOperationAction(ISD::FEXP10, MVT::v2f64, Expand);
664 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
665 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
666 setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
667 setOperationAction(ISD::FROUNDEVEN, MVT::v2f64, Expand);
668 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
669 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
670 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
671 }
672
673 if (Subtarget->hasNEON()) {
674 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
675 // supported for v4f32.
676 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
677 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
678 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
679 setOperationAction(ISD::FTAN, MVT::v4f32, Expand);
680 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
681 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
682 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
683 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
684 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
685 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
686 setOperationAction(ISD::FEXP10, MVT::v4f32, Expand);
687 setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
688 setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
689 setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
690 setOperationAction(ISD::FROUNDEVEN, MVT::v4f32, Expand);
691 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
692 setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
693
694 // Mark v2f32 intrinsics.
695 setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
696 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
697 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
698 setOperationAction(ISD::FTAN, MVT::v2f32, Expand);
699 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
700 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
701 setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
702 setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
703 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
704 setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
705 setOperationAction(ISD::FEXP10, MVT::v2f32, Expand);
706 setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
707 setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
708 setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
709 setOperationAction(ISD::FROUNDEVEN, MVT::v2f32, Expand);
710 setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
711 setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
712
713 for (ISD::NodeType Op : {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL,
714 ISD::FRINT, ISD::FTRUNC, ISD::FROUNDEVEN}) {
715 setOperationAction(Op, MVT::v4f16, Expand);
716 setOperationAction(Op, MVT::v8f16, Expand);
717 }
718
719 // Neon does not support some operations on v1i64 and v2i64 types.
720 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
721 // Custom handling for some quad-vector types to detect VMULL.
722 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
723 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
724 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
725 // Custom handling for some vector types to avoid expensive expansions
726 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
728 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
730 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
731 // a destination type that is wider than the source, and nor does
732 // it have a FP_TO_[SU]INT instruction with a narrower destination than
733 // source.
742
744 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
745
746 // NEON does not have single instruction CTPOP for vectors with element
747 // types wider than 8-bits. However, custom lowering can leverage the
748 // v8i8/v16i8 vcnt instruction.
755
756 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
757 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
758
759 // NEON does not have single instruction CTTZ for vectors.
761 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
762 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
763 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
764
765 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
766 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
767 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
768 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
769
774
779
783 }
784
785 // NEON only has FMA instructions as of VFP4.
786 if (!Subtarget->hasVFP4Base()) {
787 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
788 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
789 }
790
792 ISD::FP_TO_UINT, ISD::FMUL, ISD::LOAD});
793
794 // It is legal to extload from v4i8 to v4i16 or v4i32.
795 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
796 MVT::v2i32}) {
801 }
802 }
803
804 for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
805 MVT::v4i32}) {
806 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
807 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
808 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
809 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
810 }
811 }
812
813 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
819 ISD::INTRINSIC_VOID, ISD::VECREDUCE_ADD, ISD::ADD, ISD::BITCAST});
820 }
821 if (Subtarget->hasMVEIntegerOps()) {
823 ISD::FP_EXTEND, ISD::SELECT, ISD::SELECT_CC,
824 ISD::SETCC});
825 }
826 if (Subtarget->hasMVEFloatOps()) {
828 }
829
830 if (!Subtarget->hasFP64()) {
831 // When targeting a floating-point unit with only single-precision
832 // operations, f64 is legal for the few double-precision instructions which
833 // are present However, no double-precision operations other than moves,
834 // loads and stores are provided by the hardware.
843 setOperationAction(ISD::FNEG, MVT::f64, Expand);
844 setOperationAction(ISD::FABS, MVT::f64, Expand);
845 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
846 setOperationAction(ISD::FSIN, MVT::f64, Expand);
847 setOperationAction(ISD::FCOS, MVT::f64, Expand);
848 setOperationAction(ISD::FPOW, MVT::f64, Expand);
849 setOperationAction(ISD::FLOG, MVT::f64, Expand);
850 setOperationAction(ISD::FLOG2, MVT::f64, Expand);
851 setOperationAction(ISD::FLOG10, MVT::f64, Expand);
852 setOperationAction(ISD::FEXP, MVT::f64, Expand);
853 setOperationAction(ISD::FEXP2, MVT::f64, Expand);
854 setOperationAction(ISD::FEXP10, MVT::f64, Expand);
855 setOperationAction(ISD::FCEIL, MVT::f64, Expand);
856 setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
857 setOperationAction(ISD::FRINT, MVT::f64, Expand);
858 setOperationAction(ISD::FROUNDEVEN, MVT::f64, Expand);
859 setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
860 setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
873 }
874
875 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
876 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
878 if (Subtarget->hasFullFP16()) {
881 }
882 }
883
884 if (!Subtarget->hasFP16()) {
885 setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
887 }
888
889 computeRegisterProperties(Subtarget->getRegisterInfo());
890
891 // ARM does not have floating-point extending loads.
892 for (MVT VT : MVT::fp_valuetypes()) {
893 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
894 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
895 setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);
896 }
897
898 // ... or truncating stores
899 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
900 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
901 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
902 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
903 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
904
905 // ARM does not have i1 sign extending load.
906 for (MVT VT : MVT::integer_valuetypes())
908
909 // ARM supports all 4 flavors of integer indexed load / store.
910 if (!Subtarget->isThumb1Only()) {
911 for (unsigned im = (unsigned)ISD::PRE_INC;
913 setIndexedLoadAction(im, MVT::i1, Legal);
914 setIndexedLoadAction(im, MVT::i8, Legal);
915 setIndexedLoadAction(im, MVT::i16, Legal);
916 setIndexedLoadAction(im, MVT::i32, Legal);
917 setIndexedStoreAction(im, MVT::i1, Legal);
918 setIndexedStoreAction(im, MVT::i8, Legal);
919 setIndexedStoreAction(im, MVT::i16, Legal);
920 setIndexedStoreAction(im, MVT::i32, Legal);
921 }
922 } else {
923 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
926 }
927
932
935 if (Subtarget->hasDSP()) {
944 }
945 if (Subtarget->hasBaseDSP()) {
948 }
949
950 // i64 operation support.
953 if (Subtarget->isThumb1Only()) {
956 }
957 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
958 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
960
968 setOperationAction(ISD::LOAD, MVT::i64, Custom);
969 setOperationAction(ISD::STORE, MVT::i64, Custom);
970
971 // MVE lowers 64 bit shifts to lsll and lsrl
972 // assuming that ISD::SRL and SRA of i64 are already marked custom
973 if (Subtarget->hasMVEIntegerOps())
975
976 // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
977 if (Subtarget->isThumb1Only()) {
981 }
982
983 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
985
986 // ARM does not have ROTL.
991 }
993 // TODO: These two should be set to LibCall, but this currently breaks
994 // the Linux kernel build. See #101786.
997 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1000 }
1001
1002 // @llvm.readcyclecounter requires the Performance Monitors extension.
1003 // Default to the 0 expansion on unsupported platforms.
1004 // FIXME: Technically there are older ARM CPUs that have
1005 // implementation-specific ways of obtaining this information.
1006 if (Subtarget->hasPerfMon())
1007 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
1008
1009 // Only ARMv6 has BSWAP.
1010 if (!Subtarget->hasV6Ops())
1012
1013 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1014 : Subtarget->hasDivideInARMMode();
1015 if (!hasDivide) {
1016 // These are expanded into libcalls if the cpu doesn't have HW divider.
1019 }
1020
1021 if (TT.isOSWindows() && !Subtarget->hasDivideInThumbMode()) {
1024
1027 }
1028
1031
1032 // Register based DivRem for AEABI (RTABI 4.2)
1033 if (TT.isTargetAEABI() || TT.isAndroid() || TT.isTargetGNUAEABI() ||
1034 TT.isTargetMuslAEABI() || TT.isOSFuchsia() || TT.isOSWindows()) {
1037 HasStandaloneRem = false;
1038
1043 } else {
1046 }
1047
1052
1053 setOperationAction(ISD::TRAP, MVT::Other, Legal);
1054 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
1055
1056 // Use the default implementation.
1057 setOperationAction(ISD::VASTART, MVT::Other, Custom);
1058 setOperationAction(ISD::VAARG, MVT::Other, Expand);
1059 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
1060 setOperationAction(ISD::VAEND, MVT::Other, Expand);
1061 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
1062 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
1063
1064 if (TT.isOSWindows())
1065 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
1066 else
1067 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
1068
1069 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1070 // the default expansion.
1071 InsertFencesForAtomic = false;
1072 if (Subtarget->hasAnyDataBarrier() &&
1073 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1074 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1075 // to ldrex/strex loops already.
1076 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
1077 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1078 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
1079
1080 // On v8, we have particularly efficient implementations of atomic fences
1081 // if they can be combined with nearby atomic loads and stores.
1082 if (!Subtarget->hasAcquireRelease() ||
1083 getTargetMachine().getOptLevel() == CodeGenOptLevel::None) {
1084 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1085 InsertFencesForAtomic = true;
1086 }
1087 } else {
1088 // If there's anything we can use as a barrier, go through custom lowering
1089 // for ATOMIC_FENCE.
1090 // If target has DMB in thumb, Fences can be inserted.
1091 if (Subtarget->hasDataBarrier())
1092 InsertFencesForAtomic = true;
1093
1094 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
1095 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1096
1097 // Set them all for libcall, which will force libcalls.
1098 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
1099 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
1100 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
1101 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, LibCall);
1102 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, LibCall);
1103 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
1104 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
1105 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, LibCall);
1106 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, LibCall);
1107 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, LibCall);
1108 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, LibCall);
1109 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, LibCall);
1110 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1111 // Unordered/Monotonic case.
1112 if (!InsertFencesForAtomic) {
1113 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
1114 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
1115 }
1116 }
1117
1118 // Compute supported atomic widths.
1119 if (TT.isOSLinux() || (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1120 // For targets where __sync_* routines are reliably available, we use them
1121 // if necessary.
1122 //
1123 // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
1124 // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
1125 //
1126 // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
1127 // such targets should provide __sync_* routines, which use the ARM mode
1128 // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
1129 // encoding; see ARMISD::MEMBARRIER_MCR.)
1131 } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1132 Subtarget->hasForced32BitAtomics()) {
1133 // Cortex-M (besides Cortex-M0) have 32-bit atomics.
1135 } else {
1136 // We can't assume anything about other targets; just use libatomic
1137 // routines.
1139 }
1140
1142
1143 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
1144
1145 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1146 if (!Subtarget->hasV6Ops()) {
1149 }
1151
1152 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1153 !Subtarget->isThumb1Only()) {
1154 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1155 // iff target supports vfp2.
1156 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1158 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
1159 setOperationAction(ISD::GET_FPENV, MVT::i32, Legal);
1160 setOperationAction(ISD::SET_FPENV, MVT::i32, Legal);
1161 setOperationAction(ISD::RESET_FPENV, MVT::Other, Legal);
1162 setOperationAction(ISD::GET_FPMODE, MVT::i32, Legal);
1163 setOperationAction(ISD::SET_FPMODE, MVT::i32, Custom);
1164 setOperationAction(ISD::RESET_FPMODE, MVT::Other, Custom);
1165 }
1166
1167 // We want to custom lower some of our intrinsics.
1172
1182 if (Subtarget->hasFullFP16()) {
1186 }
1187
1189
1190 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
1191 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
1192 if (Subtarget->hasFullFP16())
1193 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
1194 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
1195 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
1196 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1197
1198 // We don't support sin/cos/fmod/copysign/pow
1199 setOperationAction(ISD::FSIN, MVT::f64, Expand);
1200 setOperationAction(ISD::FSIN, MVT::f32, Expand);
1201 setOperationAction(ISD::FCOS, MVT::f32, Expand);
1202 setOperationAction(ISD::FCOS, MVT::f64, Expand);
1203 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
1204 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
1207 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1208 !Subtarget->isThumb1Only()) {
1211 }
1212 setOperationAction(ISD::FPOW, MVT::f64, Expand);
1213 setOperationAction(ISD::FPOW, MVT::f32, Expand);
1214
1215 if (!Subtarget->hasVFP4Base()) {
1218 }
1219
1220 // Various VFP goodness
1221 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1222 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1223 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1224 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
1225 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
1226 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, LibCall);
1227 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, LibCall);
1228 }
1229
1230 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1231 if (!Subtarget->hasFP16()) {
1232 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
1233 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
1234 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, LibCall);
1235 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, LibCall);
1236 }
1237
1238 // Strict floating-point comparisons need custom lowering.
1245 }
1246
1247 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
1248 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
1249
1250 // FP-ARMv8 implements a lot of rounding-like FP operations.
1251 if (Subtarget->hasFPARMv8Base()) {
1252 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
1253 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
1254 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1255 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
1256 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1257 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1258 setOperationAction(ISD::FROUNDEVEN, MVT::f32, Legal);
1259 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
1260 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
1261 if (Subtarget->hasNEON()) {
1262 setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
1263 setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
1264 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1265 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1266 }
1267
1268 if (Subtarget->hasFP64()) {
1269 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
1270 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
1271 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1272 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
1273 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1274 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1275 setOperationAction(ISD::FROUNDEVEN, MVT::f64, Legal);
1276 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
1277 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
1278 }
1279 }
1280
1281 // FP16 often need to be promoted to call lib functions
1282 // clang-format off
1283 if (Subtarget->hasFullFP16()) {
1284 setOperationAction(ISD::LRINT, MVT::f16, Expand);
1285 setOperationAction(ISD::LROUND, MVT::f16, Expand);
1287
1288 for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
1289 ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
1290 ISD::FSINCOSPI, ISD::FMODF, ISD::FACOS,
1291 ISD::FASIN, ISD::FATAN, ISD::FATAN2,
1292 ISD::FCOSH, ISD::FSINH, ISD::FTANH,
1293 ISD::FTAN, ISD::FEXP, ISD::FEXP2,
1294 ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
1295 ISD::FLOG10, ISD::STRICT_FREM, ISD::STRICT_FPOW,
1302 setOperationAction(Op, MVT::f16, Promote);
1303 }
1304
1305 // Round-to-integer need custom lowering for fp16, as Promote doesn't work
1306 // because the result type is integer.
1308 setOperationAction(Op, MVT::f16, Custom);
1309
1310 for (auto Op : {ISD::FROUND, ISD::FROUNDEVEN, ISD::FTRUNC,
1311 ISD::FNEARBYINT, ISD::FRINT, ISD::FFLOOR,
1315 setOperationAction(Op, MVT::f16, Legal);
1316 }
1317 // clang-format on
1318 }
1319
1320 if (Subtarget->hasNEON()) {
1321 // vmin and vmax aren't available in a scalar form, so we can use
1322 // a NEON instruction with an undef lane instead.
1323 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
1324 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
1325 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
1326 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
1327 setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal);
1328 setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal);
1329 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
1330 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
1331
1332 if (Subtarget->hasV8Ops()) {
1333 setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal);
1334 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
1335 setOperationAction(ISD::FROUND, MVT::v2f32, Legal);
1336 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1337 setOperationAction(ISD::FROUNDEVEN, MVT::v2f32, Legal);
1338 setOperationAction(ISD::FROUNDEVEN, MVT::v4f32, Legal);
1339 setOperationAction(ISD::FCEIL, MVT::v2f32, Legal);
1340 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
1341 setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal);
1342 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
1343 setOperationAction(ISD::FRINT, MVT::v2f32, Legal);
1344 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1345 }
1346
1347 if (Subtarget->hasFullFP16()) {
1348 setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal);
1349 setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal);
1350 setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal);
1351 setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal);
1352
1353 setOperationAction(ISD::FMINIMUM, MVT::v4f16, Legal);
1354 setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal);
1355 setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal);
1356 setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal);
1357
1358 setOperationAction(ISD::FFLOOR, MVT::v4f16, Legal);
1359 setOperationAction(ISD::FFLOOR, MVT::v8f16, Legal);
1360 setOperationAction(ISD::FROUND, MVT::v4f16, Legal);
1361 setOperationAction(ISD::FROUND, MVT::v8f16, Legal);
1362 setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Legal);
1363 setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Legal);
1364 setOperationAction(ISD::FCEIL, MVT::v4f16, Legal);
1365 setOperationAction(ISD::FCEIL, MVT::v8f16, Legal);
1366 setOperationAction(ISD::FTRUNC, MVT::v4f16, Legal);
1367 setOperationAction(ISD::FTRUNC, MVT::v8f16, Legal);
1368 setOperationAction(ISD::FRINT, MVT::v4f16, Legal);
1369 setOperationAction(ISD::FRINT, MVT::v8f16, Legal);
1370 }
1371 }
1372
1373 // On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has
1374 // it, but it's just a wrapper around ldexp.
1375 if (TT.isOSWindows()) {
1376 for (ISD::NodeType Op : {ISD::FLDEXP, ISD::STRICT_FLDEXP, ISD::FFREXP})
1377 if (isOperationExpand(Op, MVT::f32))
1378 setOperationAction(Op, MVT::f32, Promote);
1379 }
1380
1381 // LegalizeDAG currently can't expand fp16 LDEXP/FREXP on targets where i16
1382 // isn't legal.
1383 for (ISD::NodeType Op : {ISD::FLDEXP, ISD::STRICT_FLDEXP, ISD::FFREXP})
1384 if (isOperationExpand(Op, MVT::f16))
1385 setOperationAction(Op, MVT::f16, Promote);
1386
1387 // We have target-specific dag combine patterns for the following nodes:
1388 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1391
1392 if (Subtarget->hasMVEIntegerOps())
1394
1395 if (Subtarget->hasV6Ops())
1397 if (Subtarget->isThumb1Only())
1399 // Attempt to lower smin/smax to ssat/usat
1400 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1401 Subtarget->isThumb2()) {
1403 }
1404
1406
1407 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1408 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1410 else
1412
1413 //// temporary - rewrite interface to use type
1416 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1418 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1420
1421 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1422 // are at least 4 bytes aligned.
1424
1425 // Prefer likely predicted branches to selects on out-of-order cores.
1426 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1427
1428 setPrefLoopAlignment(Align(1ULL << Subtarget->getPreferBranchLogAlignment()));
1430 Align(1ULL << Subtarget->getPreferBranchLogAlignment()));
1431
1432 setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1433}
1434
1436 return Subtarget->useSoftFloat();
1437}
1438
1440 return !Subtarget->isThumb1Only() && VT.getSizeInBits() <= 32;
1441}
1442
1443// FIXME: It might make sense to define the representative register class as the
1444// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1445// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1446// SPR's representative would be DPR_VFP2. This should work well if register
1447// pressure tracking were modified such that a register use would increment the
1448// pressure of the register class's representative and all of it's super
1449// classes' representatives transitively. We have not implemented this because
1450// of the difficulty prior to coalescing of modeling operand register classes
1451// due to the common occurrence of cross class copies and subregister insertions
1452// and extractions.
1453std::pair<const TargetRegisterClass *, uint8_t>
1455 MVT VT) const {
1456 const TargetRegisterClass *RRC = nullptr;
1457 uint8_t Cost = 1;
1458 switch (VT.SimpleTy) {
1459 default:
1461 // Use DPR as representative register class for all floating point
1462 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1463 // the cost is 1 for both f32 and f64.
1464 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1465 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1466 RRC = &ARM::DPRRegClass;
1467 // When NEON is used for SP, only half of the register file is available
1468 // because operations that define both SP and DP results will be constrained
1469 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1470 // coalescing by double-counting the SP regs. See the FIXME above.
1471 if (Subtarget->useNEONForSinglePrecisionFP())
1472 Cost = 2;
1473 break;
1474 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1475 case MVT::v4f32: case MVT::v2f64:
1476 RRC = &ARM::DPRRegClass;
1477 Cost = 2;
1478 break;
1479 case MVT::v4i64:
1480 RRC = &ARM::DPRRegClass;
1481 Cost = 4;
1482 break;
1483 case MVT::v8i64:
1484 RRC = &ARM::DPRRegClass;
1485 Cost = 8;
1486 break;
1487 }
1488 return std::make_pair(RRC, Cost);
1489}
1490
1492 EVT VT) const {
1493 if (!VT.isVector())
1494 return getPointerTy(DL);
1495
1496 // MVE has a predicate register.
1497 if ((Subtarget->hasMVEIntegerOps() &&
1498 (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
1499 VT == MVT::v16i8)) ||
1500 (Subtarget->hasMVEFloatOps() &&
1501 (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
1502 return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1504}
1505
1506/// getRegClassFor - Return the register class that should be used for the
1507/// specified value type.
1508const TargetRegisterClass *
1509ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1510 (void)isDivergent;
1511 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1512 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1513 // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1514 // MVE Q registers.
1515 if (Subtarget->hasNEON()) {
1516 if (VT == MVT::v4i64)
1517 return &ARM::QQPRRegClass;
1518 if (VT == MVT::v8i64)
1519 return &ARM::QQQQPRRegClass;
1520 }
1521 if (Subtarget->hasMVEIntegerOps()) {
1522 if (VT == MVT::v4i64)
1523 return &ARM::MQQPRRegClass;
1524 if (VT == MVT::v8i64)
1525 return &ARM::MQQQQPRRegClass;
1526 }
1528}
1529
1530// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1531// source/dest is aligned and the copy size is large enough. We therefore want
1532// to align such objects passed to memory intrinsics.
1534 Align &PrefAlign) const {
1535 if (!isa<MemIntrinsic>(CI))
1536 return false;
1537 MinSize = 8;
1538 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1539 // cycle faster than 4-byte aligned LDM.
1540 PrefAlign =
1541 (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
1542 return true;
1543}
1544
1545// Create a fast isel object.
1546FastISel *
1548 const TargetLibraryInfo *libInfo) const {
1549 return ARM::createFastISel(funcInfo, libInfo);
1550}
1551
1553 unsigned NumVals = N->getNumValues();
1554 if (!NumVals)
1555 return Sched::RegPressure;
1556
1557 for (unsigned i = 0; i != NumVals; ++i) {
1558 EVT VT = N->getValueType(i);
1559 if (VT == MVT::Glue || VT == MVT::Other)
1560 continue;
1561 if (VT.isFloatingPoint() || VT.isVector())
1562 return Sched::ILP;
1563 }
1564
1565 if (!N->isMachineOpcode())
1566 return Sched::RegPressure;
1567
1568 // Load are scheduled for latency even if there instruction itinerary
1569 // is not available.
1570 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1571 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1572
1573 if (MCID.getNumDefs() == 0)
1574 return Sched::RegPressure;
1575 if (!Itins->isEmpty() &&
1576 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2U)
1577 return Sched::ILP;
1578
1579 return Sched::RegPressure;
1580}
1581
1582//===----------------------------------------------------------------------===//
1583// Lowering Code
1584//===----------------------------------------------------------------------===//
1585
1586static bool isSRL16(const SDValue &Op) {
1587 if (Op.getOpcode() != ISD::SRL)
1588 return false;
1589 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1590 return Const->getZExtValue() == 16;
1591 return false;
1592}
1593
1594static bool isSRA16(const SDValue &Op) {
1595 if (Op.getOpcode() != ISD::SRA)
1596 return false;
1597 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1598 return Const->getZExtValue() == 16;
1599 return false;
1600}
1601
1602static bool isSHL16(const SDValue &Op) {
1603 if (Op.getOpcode() != ISD::SHL)
1604 return false;
1605 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1606 return Const->getZExtValue() == 16;
1607 return false;
1608}
1609
1610// Check for a signed 16-bit value. We special case SRA because it makes it
1611// more simple when also looking for SRAs that aren't sign extending a
1612// smaller value. Without the check, we'd need to take extra care with
1613// checking order for some operations.
1614static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1615 if (isSRA16(Op))
1616 return isSHL16(Op.getOperand(0));
1617 return DAG.ComputeNumSignBits(Op) == 17;
1618}
1619
1620/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1622 switch (CC) {
1623 default: llvm_unreachable("Unknown condition code!");
1624 case ISD::SETNE: return ARMCC::NE;
1625 case ISD::SETEQ: return ARMCC::EQ;
1626 case ISD::SETGT: return ARMCC::GT;
1627 case ISD::SETGE: return ARMCC::GE;
1628 case ISD::SETLT: return ARMCC::LT;
1629 case ISD::SETLE: return ARMCC::LE;
1630 case ISD::SETUGT: return ARMCC::HI;
1631 case ISD::SETUGE: return ARMCC::HS;
1632 case ISD::SETULT: return ARMCC::LO;
1633 case ISD::SETULE: return ARMCC::LS;
1634 }
1635}
1636
1637/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1639 ARMCC::CondCodes &CondCode2) {
1640 CondCode2 = ARMCC::AL;
1641 switch (CC) {
1642 default: llvm_unreachable("Unknown FP condition!");
1643 case ISD::SETEQ:
1644 case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
1645 case ISD::SETGT:
1646 case ISD::SETOGT: CondCode = ARMCC::GT; break;
1647 case ISD::SETGE:
1648 case ISD::SETOGE: CondCode = ARMCC::GE; break;
1649 case ISD::SETOLT: CondCode = ARMCC::MI; break;
1650 case ISD::SETOLE: CondCode = ARMCC::LS; break;
1651 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
1652 case ISD::SETO: CondCode = ARMCC::VC; break;
1653 case ISD::SETUO: CondCode = ARMCC::VS; break;
1654 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
1655 case ISD::SETUGT: CondCode = ARMCC::HI; break;
1656 case ISD::SETUGE: CondCode = ARMCC::PL; break;
1657 case ISD::SETLT:
1658 case ISD::SETULT: CondCode = ARMCC::LT; break;
1659 case ISD::SETLE:
1660 case ISD::SETULE: CondCode = ARMCC::LE; break;
1661 case ISD::SETNE:
1662 case ISD::SETUNE: CondCode = ARMCC::NE; break;
1663 }
1664}
1665
1666//===----------------------------------------------------------------------===//
1667// Calling Convention Implementation
1668//===----------------------------------------------------------------------===//
1669
1670/// getEffectiveCallingConv - Get the effective calling convention, taking into
1671/// account presence of floating point hardware and calling convention
1672/// limitations, such as support for variadic functions.
1674ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1675 bool isVarArg) const {
1676 switch (CC) {
1677 default:
1678 report_fatal_error("Unsupported calling convention");
1681 case CallingConv::GHC:
1683 return CC;
1689 case CallingConv::Swift:
1692 case CallingConv::C:
1693 case CallingConv::Tail:
1694 if (!getTM().isAAPCS_ABI())
1695 return CallingConv::ARM_APCS;
1696 else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
1697 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
1698 !isVarArg)
1700 else
1702 case CallingConv::Fast:
1704 if (!getTM().isAAPCS_ABI()) {
1705 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
1706 return CallingConv::Fast;
1707 return CallingConv::ARM_APCS;
1708 } else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
1709 !isVarArg)
1711 else
1713 }
1714}
1715
1717 bool isVarArg) const {
1718 return CCAssignFnForNode(CC, false, isVarArg);
1719}
1720
1722 bool isVarArg) const {
1723 return CCAssignFnForNode(CC, true, isVarArg);
1724}
1725
1726/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1727/// CallingConvention.
1728CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1729 bool Return,
1730 bool isVarArg) const {
1731 switch (getEffectiveCallingConv(CC, isVarArg)) {
1732 default:
1733 report_fatal_error("Unsupported calling convention");
1735 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1737 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1739 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1740 case CallingConv::Fast:
1741 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1742 case CallingConv::GHC:
1743 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1745 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1747 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1749 return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
1750 }
1751}
1752
1753SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
1754 MVT LocVT, MVT ValVT, SDValue Val) const {
1755 Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
1756 Val);
1757 if (Subtarget->hasFullFP16()) {
1758 Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
1759 } else {
1760 Val = DAG.getNode(ISD::TRUNCATE, dl,
1761 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
1762 Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
1763 }
1764 return Val;
1765}
1766
1767SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
1768 MVT LocVT, MVT ValVT,
1769 SDValue Val) const {
1770 if (Subtarget->hasFullFP16()) {
1771 Val = DAG.getNode(ARMISD::VMOVrh, dl,
1772 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
1773 } else {
1774 Val = DAG.getNode(ISD::BITCAST, dl,
1775 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
1776 Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
1777 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
1778 }
1779 return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
1780}
1781
1782/// LowerCallResult - Lower the result values of a call into the
1783/// appropriate copies out of appropriate physical registers.
1784SDValue ARMTargetLowering::LowerCallResult(
1785 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1786 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1787 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1788 SDValue ThisVal, bool isCmseNSCall) const {
1789 // Assign locations to each value returned by this call.
1791 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1792 *DAG.getContext());
1793 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1794
1795 // Copy all of the result registers out of their specified physreg.
1796 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1797 CCValAssign VA = RVLocs[i];
1798
1799 // Pass 'this' value directly from the argument to return value, to avoid
1800 // reg unit interference
1801 if (i == 0 && isThisReturn) {
1802 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1803 "unexpected return calling convention register assignment");
1804 InVals.push_back(ThisVal);
1805 continue;
1806 }
1807
1808 SDValue Val;
1809 if (VA.needsCustom() &&
1810 (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
1811 // Handle f64 or half of a v2f64.
1812 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1813 InGlue);
1814 Chain = Lo.getValue(1);
1815 InGlue = Lo.getValue(2);
1816 VA = RVLocs[++i]; // skip ahead to next loc
1817 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1818 InGlue);
1819 Chain = Hi.getValue(1);
1820 InGlue = Hi.getValue(2);
1821 if (!Subtarget->isLittle())
1822 std::swap (Lo, Hi);
1823 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1824
1825 if (VA.getLocVT() == MVT::v2f64) {
1826 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1827 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1828 DAG.getConstant(0, dl, MVT::i32));
1829
1830 VA = RVLocs[++i]; // skip ahead to next loc
1831 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
1832 Chain = Lo.getValue(1);
1833 InGlue = Lo.getValue(2);
1834 VA = RVLocs[++i]; // skip ahead to next loc
1835 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
1836 Chain = Hi.getValue(1);
1837 InGlue = Hi.getValue(2);
1838 if (!Subtarget->isLittle())
1839 std::swap (Lo, Hi);
1840 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1841 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1842 DAG.getConstant(1, dl, MVT::i32));
1843 }
1844 } else {
1845 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1846 InGlue);
1847 Chain = Val.getValue(1);
1848 InGlue = Val.getValue(2);
1849 }
1850
1851 switch (VA.getLocInfo()) {
1852 default: llvm_unreachable("Unknown loc info!");
1853 case CCValAssign::Full: break;
1854 case CCValAssign::BCvt:
1855 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1856 break;
1857 }
1858
1859 // f16 arguments have their size extended to 4 bytes and passed as if they
1860 // had been copied to the LSBs of a 32-bit register.
1861 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
1862 if (VA.needsCustom() &&
1863 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
1864 Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
1865
1866 // On CMSE Non-secure Calls, call results (returned values) whose bitwidth
1867 // is less than 32 bits must be sign- or zero-extended after the call for
1868 // security reasons. Although the ABI mandates an extension done by the
1869 // callee, the latter cannot be trusted to follow the rules of the ABI.
1870 const ISD::InputArg &Arg = Ins[VA.getValNo()];
1871 if (isCmseNSCall && Arg.ArgVT.isScalarInteger() &&
1872 VA.getLocVT().isScalarInteger() && Arg.ArgVT.bitsLT(MVT::i32))
1873 Val = handleCMSEValue(Val, Arg, DAG, dl);
1874
1875 InVals.push_back(Val);
1876 }
1877
1878 return Chain;
1879}
1880
1881std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
1882 const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
1883 bool IsTailCall, int SPDiff) const {
1884 SDValue DstAddr;
1885 MachinePointerInfo DstInfo;
1886 int32_t Offset = VA.getLocMemOffset();
1887 MachineFunction &MF = DAG.getMachineFunction();
1888
1889 if (IsTailCall) {
1890 Offset += SPDiff;
1891 auto PtrVT = getPointerTy(DAG.getDataLayout());
1892 int Size = VA.getLocVT().getFixedSizeInBits() / 8;
1893 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
1894 DstAddr = DAG.getFrameIndex(FI, PtrVT);
1895 DstInfo =
1897 } else {
1898 SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
1899 DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1900 StackPtr, PtrOff);
1901 DstInfo =
1903 }
1904
1905 return std::make_pair(DstAddr, DstInfo);
1906}
1907
1908// Returns the type of copying which is required to set up a byval argument to
1909// a tail-called function. This isn't needed for non-tail calls, because they
1910// always need the equivalent of CopyOnce, but tail-calls sometimes need two to
1911// avoid clobbering another argument (CopyViaTemp), and sometimes can be
1912// optimised to zero copies when forwarding an argument from the caller's
1913// caller (NoCopy).
1914ARMTargetLowering::ByValCopyKind ARMTargetLowering::ByValNeedsCopyForTailCall(
1915 SelectionDAG &DAG, SDValue Src, SDValue Dst, ISD::ArgFlagsTy Flags) const {
1916 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1917 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
1918
1919 // Globals are always safe to copy from.
1921 return CopyOnce;
1922
1923 // Can only analyse frame index nodes, conservatively assume we need a
1924 // temporary.
1925 auto *SrcFrameIdxNode = dyn_cast<FrameIndexSDNode>(Src);
1926 auto *DstFrameIdxNode = dyn_cast<FrameIndexSDNode>(Dst);
1927 if (!SrcFrameIdxNode || !DstFrameIdxNode)
1928 return CopyViaTemp;
1929
1930 int SrcFI = SrcFrameIdxNode->getIndex();
1931 int DstFI = DstFrameIdxNode->getIndex();
1932 assert(MFI.isFixedObjectIndex(DstFI) &&
1933 "byval passed in non-fixed stack slot");
1934
1935 int64_t SrcOffset = MFI.getObjectOffset(SrcFI);
1936 int64_t DstOffset = MFI.getObjectOffset(DstFI);
1937
1938 // If the source is in the local frame, then the copy to the argument memory
1939 // is always valid.
1940 bool FixedSrc = MFI.isFixedObjectIndex(SrcFI);
1941 if (!FixedSrc ||
1942 (FixedSrc && SrcOffset < -(int64_t)AFI->getArgRegsSaveSize()))
1943 return CopyOnce;
1944
1945 // In the case of byval arguments split between registers and the stack,
1946 // computeAddrForCallArg returns a FrameIndex which corresponds only to the
1947 // stack portion, but the Src SDValue will refer to the full value, including
1948 // the local stack memory that the register portion gets stored into. We only
1949 // need to compare them for equality, so normalise on the full value version.
1950 uint64_t RegSize = Flags.getByValSize() - MFI.getObjectSize(DstFI);
1951 DstOffset -= RegSize;
1952
1953 // If the value is already in the correct location, then no copying is
1954 // needed. If not, then we need to copy via a temporary.
1955 if (SrcOffset == DstOffset)
1956 return NoCopy;
1957 else
1958 return CopyViaTemp;
1959}
1960
1961void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
1962 SDValue Chain, SDValue &Arg,
1963 RegsToPassVector &RegsToPass,
1964 CCValAssign &VA, CCValAssign &NextVA,
1965 SDValue &StackPtr,
1966 SmallVectorImpl<SDValue> &MemOpChains,
1967 bool IsTailCall,
1968 int SPDiff) const {
1969 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1970 DAG.getVTList(MVT::i32, MVT::i32), Arg);
1971 unsigned id = Subtarget->isLittle() ? 0 : 1;
1972 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1973
1974 if (NextVA.isRegLoc())
1975 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1976 else {
1977 assert(NextVA.isMemLoc());
1978 if (!StackPtr.getNode())
1979 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
1981
1982 SDValue DstAddr;
1983 MachinePointerInfo DstInfo;
1984 std::tie(DstAddr, DstInfo) =
1985 computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
1986 MemOpChains.push_back(
1987 DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
1988 }
1989}
1990
1991static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
1992 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
1994}
1995
1996/// LowerCall - Lowering a call into a callseq_start <-
1997/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1998/// nodes.
1999SDValue
2000ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2001 SmallVectorImpl<SDValue> &InVals) const {
2002 SelectionDAG &DAG = CLI.DAG;
2003 SDLoc &dl = CLI.DL;
2004 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2005 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2006 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2007 SDValue Chain = CLI.Chain;
2008 SDValue Callee = CLI.Callee;
2009 bool &isTailCall = CLI.IsTailCall;
2010 CallingConv::ID CallConv = CLI.CallConv;
2011 bool doesNotRet = CLI.DoesNotReturn;
2012 bool isVarArg = CLI.IsVarArg;
2013 const CallBase *CB = CLI.CB;
2014
2015 MachineFunction &MF = DAG.getMachineFunction();
2016 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2017 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
2018 MachineFunction::CallSiteInfo CSInfo;
2019 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2020 bool isThisReturn = false;
2021 bool isCmseNSCall = false;
2022 bool isSibCall = false;
2023 bool PreferIndirect = false;
2024 bool GuardWithBTI = false;
2025
2026 // Analyze operands of the call, assigning locations to each operand.
2028 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2029 *DAG.getContext());
2030 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2031
2032 // Lower 'returns_twice' calls to a pseudo-instruction.
2033 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2034 !Subtarget->noBTIAtReturnTwice())
2035 GuardWithBTI = AFI->branchTargetEnforcement();
2036
2037 // Set type id for call site info.
2038 if (MF.getTarget().Options.EmitCallGraphSection && CB && CB->isIndirectCall())
2039 CSInfo = MachineFunction::CallSiteInfo(*CB);
2040
2041 // Determine whether this is a non-secure function call.
2042 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2043 isCmseNSCall = true;
2044
2045 // Disable tail calls if they're not supported.
2046 if (!Subtarget->supportsTailCall())
2047 isTailCall = false;
2048
2049 // For both the non-secure calls and the returns from a CMSE entry function,
2050 // the function needs to do some extra work after the call, or before the
2051 // return, respectively, thus it cannot end with a tail call
2052 if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2053 isTailCall = false;
2054
2055 if (isa<GlobalAddressSDNode>(Callee)) {
2056 // If we're optimizing for minimum size and the function is called three or
2057 // more times in this block, we can improve codesize by calling indirectly
2058 // as BLXr has a 16-bit encoding.
2059 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2060 if (CLI.CB) {
2061 auto *BB = CLI.CB->getParent();
2062 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2063 count_if(GV->users(), [&BB](const User *U) {
2064 return isa<Instruction>(U) &&
2065 cast<Instruction>(U)->getParent() == BB;
2066 }) > 2;
2067 }
2068 }
2069 if (isTailCall) {
2070 // Check if it's really possible to do a tail call.
2071 isTailCall =
2072 IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, PreferIndirect);
2073
2074 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2075 CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2076 isSibCall = true;
2077
2078 // We don't support GuaranteedTailCallOpt for ARM, only automatically
2079 // detected sibcalls.
2080 if (isTailCall)
2081 ++NumTailCalls;
2082 }
2083
2084 if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2085 report_fatal_error("failed to perform tail call elimination on a call "
2086 "site marked musttail");
2087
2088 // Get a count of how many bytes are to be pushed on the stack.
2089 unsigned NumBytes = CCInfo.getStackSize();
2090
2091 // SPDiff is the byte offset of the call's argument area from the callee's.
2092 // Stores to callee stack arguments will be placed in FixedStackSlots offset
2093 // by this amount for a tail call. In a sibling call it must be 0 because the
2094 // caller will deallocate the entire stack and the callee still expects its
2095 // arguments to begin at SP+0. Completely unused for non-tail calls.
2096 int SPDiff = 0;
2097
2098 if (isTailCall && !isSibCall) {
2099 auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2100 unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2101
2102 // Since callee will pop argument stack as a tail call, we must keep the
2103 // popped size 16-byte aligned.
2104 MaybeAlign StackAlign = DAG.getDataLayout().getStackAlignment();
2105 assert(StackAlign && "data layout string is missing stack alignment");
2106 NumBytes = alignTo(NumBytes, *StackAlign);
2107
2108 // SPDiff will be negative if this tail call requires more space than we
2109 // would automatically have in our incoming argument space. Positive if we
2110 // can actually shrink the stack.
2111 SPDiff = NumReusableBytes - NumBytes;
2112
2113 // If this call requires more stack than we have available from
2114 // LowerFormalArguments, tell FrameLowering to reserve space for it.
2115 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2116 AFI->setArgRegsSaveSize(-SPDiff);
2117 }
2118
2119 if (isSibCall) {
2120 // For sibling tail calls, memory operands are available in our caller's stack.
2121 NumBytes = 0;
2122 } else {
2123 // Adjust the stack pointer for the new arguments...
2124 // These operations are automatically eliminated by the prolog/epilog pass
2125 Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2126 }
2127
2129 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2130
2131 RegsToPassVector RegsToPass;
2132 SmallVector<SDValue, 8> MemOpChains;
2133
2134 // If we are doing a tail-call, any byval arguments will be written to stack
2135 // space which was used for incoming arguments. If any the values being used
2136 // are incoming byval arguments to this function, then they might be
2137 // overwritten by the stores of the outgoing arguments. To avoid this, we
2138 // need to make a temporary copy of them in local stack space, then copy back
2139 // to the argument area.
2140 DenseMap<unsigned, SDValue> ByValTemporaries;
2141 SDValue ByValTempChain;
2142 if (isTailCall) {
2143 SmallVector<SDValue, 8> ByValCopyChains;
2144 for (const CCValAssign &VA : ArgLocs) {
2145 unsigned ArgIdx = VA.getValNo();
2146 SDValue Src = OutVals[ArgIdx];
2147 ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags;
2148
2149 if (!Flags.isByVal())
2150 continue;
2151
2152 SDValue Dst;
2153 MachinePointerInfo DstInfo;
2154 std::tie(Dst, DstInfo) =
2155 computeAddrForCallArg(dl, DAG, VA, SDValue(), true, SPDiff);
2156 ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags);
2157
2158 if (Copy == NoCopy) {
2159 // If the argument is already at the correct offset on the stack
2160 // (because we are forwarding a byval argument from our caller), we
2161 // don't need any copying.
2162 continue;
2163 } else if (Copy == CopyOnce) {
2164 // If the argument is in our local stack frame, no other argument
2165 // preparation can clobber it, so we can copy it to the final location
2166 // later.
2167 ByValTemporaries[ArgIdx] = Src;
2168 } else {
2169 assert(Copy == CopyViaTemp && "unexpected enum value");
2170 // If we might be copying this argument from the outgoing argument
2171 // stack area, we need to copy via a temporary in the local stack
2172 // frame.
2173 int TempFrameIdx = MFI.CreateStackObject(
2174 Flags.getByValSize(), Flags.getNonZeroByValAlign(), false);
2175 SDValue Temp =
2176 DAG.getFrameIndex(TempFrameIdx, getPointerTy(DAG.getDataLayout()));
2177
2178 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2179 SDValue AlignNode =
2180 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2181
2182 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2183 SDValue Ops[] = {Chain, Temp, Src, SizeNode, AlignNode};
2184 ByValCopyChains.push_back(
2185 DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, Ops));
2186 ByValTemporaries[ArgIdx] = Temp;
2187 }
2188 }
2189 if (!ByValCopyChains.empty())
2190 ByValTempChain =
2191 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains);
2192 }
2193
2194 // During a tail call, stores to the argument area must happen after all of
2195 // the function's incoming arguments have been loaded because they may alias.
2196 // This is done by folding in a TokenFactor from LowerFormalArguments, but
2197 // there's no point in doing so repeatedly so this tracks whether that's
2198 // happened yet.
2199 bool AfterFormalArgLoads = false;
2200
2201 // Walk the register/memloc assignments, inserting copies/loads. In the case
2202 // of tail call optimization, arguments are handled later.
2203 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2204 i != e;
2205 ++i, ++realArgIdx) {
2206 CCValAssign &VA = ArgLocs[i];
2207 SDValue Arg = OutVals[realArgIdx];
2208 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2209 bool isByVal = Flags.isByVal();
2210
2211 // Promote the value if needed.
2212 switch (VA.getLocInfo()) {
2213 default: llvm_unreachable("Unknown loc info!");
2214 case CCValAssign::Full: break;
2215 case CCValAssign::SExt:
2216 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2217 break;
2218 case CCValAssign::ZExt:
2219 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2220 break;
2221 case CCValAssign::AExt:
2222 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2223 break;
2224 case CCValAssign::BCvt:
2225 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2226 break;
2227 }
2228
2229 if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2230 Chain = DAG.getStackArgumentTokenFactor(Chain);
2231 if (ByValTempChain) {
2232 // In case of large byval copies, re-using the stackframe for tail-calls
2233 // can lead to overwriting incoming arguments on the stack. Force
2234 // loading these stack arguments before the copy to avoid that.
2235 SmallVector<SDValue, 8> IncomingLoad;
2236 for (unsigned I = 0; I < OutVals.size(); ++I) {
2237 if (Outs[I].Flags.isByVal())
2238 continue;
2239
2240 SDValue OutVal = OutVals[I];
2241 LoadSDNode *OutLN = dyn_cast_or_null<LoadSDNode>(OutVal);
2242 if (!OutLN)
2243 continue;
2244
2245 FrameIndexSDNode *FIN =
2247 if (!FIN)
2248 continue;
2249
2250 if (!MFI.isFixedObjectIndex(FIN->getIndex()))
2251 continue;
2252
2253 for (const CCValAssign &VA : ArgLocs) {
2254 if (VA.isMemLoc())
2255 IncomingLoad.push_back(OutVal.getValue(1));
2256 }
2257 }
2258
2259 // Update the chain to force loads for potentially clobbered argument
2260 // loads to happen before the byval copy.
2261 if (!IncomingLoad.empty()) {
2262 IncomingLoad.push_back(Chain);
2263 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, IncomingLoad);
2264 }
2265
2266 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain,
2267 ByValTempChain);
2268 }
2269 AfterFormalArgLoads = true;
2270 }
2271
2272 // f16 arguments have their size extended to 4 bytes and passed as if they
2273 // had been copied to the LSBs of a 32-bit register.
2274 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2275 if (VA.needsCustom() &&
2276 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2277 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2278 } else {
2279 // f16 arguments could have been extended prior to argument lowering.
2280 // Mask them arguments if this is a CMSE nonsecure call.
2281 auto ArgVT = Outs[realArgIdx].ArgVT;
2282 if (isCmseNSCall && (ArgVT == MVT::f16)) {
2283 auto LocBits = VA.getLocVT().getSizeInBits();
2284 auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2285 SDValue Mask =
2286 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2287 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2288 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2289 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2290 }
2291 }
2292
2293 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2294 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2295 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2296 DAG.getConstant(0, dl, MVT::i32));
2297 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2298 DAG.getConstant(1, dl, MVT::i32));
2299
2300 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2301 StackPtr, MemOpChains, isTailCall, SPDiff);
2302
2303 VA = ArgLocs[++i]; // skip ahead to next loc
2304 if (VA.isRegLoc()) {
2305 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2306 StackPtr, MemOpChains, isTailCall, SPDiff);
2307 } else {
2308 assert(VA.isMemLoc());
2309 SDValue DstAddr;
2310 MachinePointerInfo DstInfo;
2311 std::tie(DstAddr, DstInfo) =
2312 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2313 MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2314 }
2315 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2316 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2317 StackPtr, MemOpChains, isTailCall, SPDiff);
2318 } else if (VA.isRegLoc()) {
2319 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2320 Outs[0].VT == MVT::i32) {
2321 assert(VA.getLocVT() == MVT::i32 &&
2322 "unexpected calling convention register assignment");
2323 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2324 "unexpected use of 'returned'");
2325 isThisReturn = true;
2326 }
2327 const TargetOptions &Options = DAG.getTarget().Options;
2328 if (Options.EmitCallSiteInfo)
2329 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
2330 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2331 } else if (isByVal) {
2332 assert(VA.isMemLoc());
2333 unsigned offset = 0;
2334
2335 // True if this byval aggregate will be split between registers
2336 // and memory.
2337 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2338 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2339
2340 SDValue ByValSrc;
2341 bool NeedsStackCopy;
2342 if (auto It = ByValTemporaries.find(realArgIdx);
2343 It != ByValTemporaries.end()) {
2344 ByValSrc = It->second;
2345 NeedsStackCopy = true;
2346 } else {
2347 ByValSrc = Arg;
2348 NeedsStackCopy = !isTailCall;
2349 }
2350
2351 // If part of the argument is in registers, load them.
2352 if (CurByValIdx < ByValArgsCount) {
2353 unsigned RegBegin, RegEnd;
2354 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2355
2356 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2357 unsigned int i, j;
2358 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2359 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2360 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, ByValSrc, Const);
2361 SDValue Load =
2362 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2363 DAG.InferPtrAlign(AddArg));
2364 MemOpChains.push_back(Load.getValue(1));
2365 RegsToPass.push_back(std::make_pair(j, Load));
2366 }
2367
2368 // If parameter size outsides register area, "offset" value
2369 // helps us to calculate stack slot for remained part properly.
2370 offset = RegEnd - RegBegin;
2371
2372 CCInfo.nextInRegsParam();
2373 }
2374
2375 // If the memory part of the argument isn't already in the correct place
2376 // (which can happen with tail calls), copy it into the argument area.
2377 if (NeedsStackCopy && Flags.getByValSize() > 4 * offset) {
2378 auto PtrVT = getPointerTy(DAG.getDataLayout());
2379 SDValue Dst;
2380 MachinePointerInfo DstInfo;
2381 std::tie(Dst, DstInfo) =
2382 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2383 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2384 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, ByValSrc, SrcOffset);
2385 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2386 MVT::i32);
2387 SDValue AlignNode =
2388 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2389
2390 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2391 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2392 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2393 Ops));
2394 }
2395 } else {
2396 assert(VA.isMemLoc());
2397 SDValue DstAddr;
2398 MachinePointerInfo DstInfo;
2399 std::tie(DstAddr, DstInfo) =
2400 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2401
2402 SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2403 MemOpChains.push_back(Store);
2404 }
2405 }
2406
2407 if (!MemOpChains.empty())
2408 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2409
2410 // Build a sequence of copy-to-reg nodes chained together with token chain
2411 // and flag operands which copy the outgoing args into the appropriate regs.
2412 SDValue InGlue;
2413 for (const auto &[Reg, N] : RegsToPass) {
2414 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
2415 InGlue = Chain.getValue(1);
2416 }
2417
2418 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2419 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2420 // node so that legalize doesn't hack it.
2421 bool isDirect = false;
2422
2423 const TargetMachine &TM = getTargetMachine();
2424 const GlobalValue *GVal = nullptr;
2425 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2426 GVal = G->getGlobal();
2427 bool isStub = !TM.shouldAssumeDSOLocal(GVal) && Subtarget->isTargetMachO();
2428
2429 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2430 bool isLocalARMFunc = false;
2431 auto PtrVt = getPointerTy(DAG.getDataLayout());
2432
2433 if (Subtarget->genLongCalls()) {
2434 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2435 "long-calls codegen is not position independent!");
2436 // Handle a global address or an external symbol. If it's not one of
2437 // those, the target's already in a register, so we don't need to do
2438 // anything extra.
2439 if (isa<GlobalAddressSDNode>(Callee)) {
2440 if (Subtarget->genExecuteOnly()) {
2441 if (Subtarget->useMovt())
2442 ++NumMovwMovt;
2443 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2444 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2445 } else {
2446 // Create a constant pool entry for the callee address
2447 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2448 ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
2449 GVal, ARMPCLabelIndex, ARMCP::CPValue, 0);
2450
2451 // Get the address of the callee into a register
2452 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2453 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2454 Callee = DAG.getLoad(
2455 PtrVt, dl, DAG.getEntryNode(), Addr,
2457 }
2458 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2459 const char *Sym = S->getSymbol();
2460
2461 if (Subtarget->genExecuteOnly()) {
2462 if (Subtarget->useMovt())
2463 ++NumMovwMovt;
2464 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2465 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2466 } else {
2467 // Create a constant pool entry for the callee address
2468 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2469 ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(
2470 *DAG.getContext(), Sym, ARMPCLabelIndex, 0);
2471
2472 // Get the address of the callee into a register
2473 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2474 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2475 Callee = DAG.getLoad(
2476 PtrVt, dl, DAG.getEntryNode(), Addr,
2478 }
2479 }
2480 } else if (isa<GlobalAddressSDNode>(Callee)) {
2481 if (!PreferIndirect) {
2482 isDirect = true;
2483 bool isDef = GVal->isStrongDefinitionForLinker();
2484
2485 // ARM call to a local ARM function is predicable.
2486 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2487 // tBX takes a register source operand.
2488 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2489 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2490 Callee = DAG.getNode(
2491 ARMISD::WrapperPIC, dl, PtrVt,
2492 DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2493 Callee = DAG.getLoad(
2494 PtrVt, dl, DAG.getEntryNode(), Callee,
2498 } else if (Subtarget->isTargetCOFF()) {
2499 assert(Subtarget->isTargetWindows() &&
2500 "Windows is the only supported COFF target");
2501 unsigned TargetFlags = ARMII::MO_NO_FLAG;
2502 if (GVal->hasDLLImportStorageClass())
2503 TargetFlags = ARMII::MO_DLLIMPORT;
2504 else if (!TM.shouldAssumeDSOLocal(GVal))
2505 TargetFlags = ARMII::MO_COFFSTUB;
2506 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, /*offset=*/0,
2507 TargetFlags);
2508 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2509 Callee =
2510 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2511 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2513 } else {
2514 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, 0);
2515 }
2516 }
2517 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2518 isDirect = true;
2519 // tBX takes a register source operand.
2520 const char *Sym = S->getSymbol();
2521 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2522 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2523 ARMConstantPoolValue *CPV =
2525 ARMPCLabelIndex, 4);
2526 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2527 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2528 Callee = DAG.getLoad(
2529 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2531 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2532 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2533 } else {
2534 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2535 }
2536 }
2537
2538 if (isCmseNSCall) {
2539 assert(!isARMFunc && !isDirect &&
2540 "Cannot handle call to ARM function or direct call");
2541 if (NumBytes > 0) {
2542 DAG.getContext()->diagnose(
2543 DiagnosticInfoUnsupported(DAG.getMachineFunction().getFunction(),
2544 "call to non-secure function would require "
2545 "passing arguments on stack",
2546 dl.getDebugLoc()));
2547 }
2548 if (isStructRet) {
2549 DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
2551 "call to non-secure function would return value through pointer",
2552 dl.getDebugLoc()));
2553 }
2554 }
2555
2556 // FIXME: handle tail calls differently.
2557 unsigned CallOpc;
2558 if (Subtarget->isThumb()) {
2559 if (GuardWithBTI)
2560 CallOpc = ARMISD::t2CALL_BTI;
2561 else if (isCmseNSCall)
2562 CallOpc = ARMISD::tSECALL;
2563 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2564 CallOpc = ARMISD::CALL_NOLINK;
2565 else
2566 CallOpc = ARMISD::CALL;
2567 } else {
2568 if (!isDirect && !Subtarget->hasV5TOps())
2569 CallOpc = ARMISD::CALL_NOLINK;
2570 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2571 // Emit regular call when code size is the priority
2572 !Subtarget->hasMinSize())
2573 // "mov lr, pc; b _foo" to avoid confusing the RSP
2574 CallOpc = ARMISD::CALL_NOLINK;
2575 else
2576 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2577 }
2578
2579 // We don't usually want to end the call-sequence here because we would tidy
2580 // the frame up *after* the call, however in the ABI-changing tail-call case
2581 // we've carefully laid out the parameters so that when sp is reset they'll be
2582 // in the correct location.
2583 if (isTailCall && !isSibCall) {
2584 Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, dl);
2585 InGlue = Chain.getValue(1);
2586 }
2587
2588 std::vector<SDValue> Ops;
2589 Ops.push_back(Chain);
2590 Ops.push_back(Callee);
2591
2592 if (isTailCall) {
2593 Ops.push_back(DAG.getSignedTargetConstant(SPDiff, dl, MVT::i32));
2594 }
2595
2596 // Add argument registers to the end of the list so that they are known live
2597 // into the call.
2598 for (const auto &[Reg, N] : RegsToPass)
2599 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
2600
2601 // Add a register mask operand representing the call-preserved registers.
2602 const uint32_t *Mask;
2603 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2604 if (isThisReturn) {
2605 // For 'this' returns, use the R0-preserving mask if applicable
2606 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2607 if (!Mask) {
2608 // Set isThisReturn to false if the calling convention is not one that
2609 // allows 'returned' to be modeled in this way, so LowerCallResult does
2610 // not try to pass 'this' straight through
2611 isThisReturn = false;
2612 Mask = ARI->getCallPreservedMask(MF, CallConv);
2613 }
2614 } else
2615 Mask = ARI->getCallPreservedMask(MF, CallConv);
2616
2617 assert(Mask && "Missing call preserved mask for calling convention");
2618 Ops.push_back(DAG.getRegisterMask(Mask));
2619
2620 if (InGlue.getNode())
2621 Ops.push_back(InGlue);
2622
2623 if (isTailCall) {
2625 SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, MVT::Other, Ops);
2626 if (CLI.CFIType)
2627 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2628 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2629 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2630 return Ret;
2631 }
2632
2633 // Returns a chain and a flag for retval copy to use.
2634 Chain = DAG.getNode(CallOpc, dl, {MVT::Other, MVT::Glue}, Ops);
2635 if (CLI.CFIType)
2636 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2637 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2638 InGlue = Chain.getValue(1);
2639 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2640
2641 // If we're guaranteeing tail-calls will be honoured, the callee must
2642 // pop its own argument stack on return. But this call is *not* a tail call so
2643 // we need to undo that after it returns to restore the status-quo.
2644 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
2645 uint64_t CalleePopBytes =
2646 canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1U;
2647
2648 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, dl);
2649 if (!Ins.empty())
2650 InGlue = Chain.getValue(1);
2651
2652 // Handle result values, copying them out of physregs into vregs that we
2653 // return.
2654 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2655 InVals, isThisReturn,
2656 isThisReturn ? OutVals[0] : SDValue(), isCmseNSCall);
2657}
2658
2659/// HandleByVal - Every parameter *after* a byval parameter is passed
2660/// on the stack. Remember the next parameter register to allocate,
2661/// and then confiscate the rest of the parameter registers to insure
2662/// this.
2663void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2664 Align Alignment) const {
2665 // Byval (as with any stack) slots are always at least 4 byte aligned.
2666 Alignment = std::max(Alignment, Align(4));
2667
2668 MCRegister Reg = State->AllocateReg(GPRArgRegs);
2669 if (!Reg)
2670 return;
2671
2672 unsigned AlignInRegs = Alignment.value() / 4;
2673 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2674 for (unsigned i = 0; i < Waste; ++i)
2675 Reg = State->AllocateReg(GPRArgRegs);
2676
2677 if (!Reg)
2678 return;
2679
2680 unsigned Excess = 4 * (ARM::R4 - Reg);
2681
2682 // Special case when NSAA != SP and parameter size greater than size of
2683 // all remained GPR regs. In that case we can't split parameter, we must
2684 // send it to stack. We also must set NCRN to R4, so waste all
2685 // remained registers.
2686 const unsigned NSAAOffset = State->getStackSize();
2687 if (NSAAOffset != 0 && Size > Excess) {
2688 while (State->AllocateReg(GPRArgRegs))
2689 ;
2690 return;
2691 }
2692
2693 // First register for byval parameter is the first register that wasn't
2694 // allocated before this method call, so it would be "reg".
2695 // If parameter is small enough to be saved in range [reg, r4), then
2696 // the end (first after last) register would be reg + param-size-in-regs,
2697 // else parameter would be splitted between registers and stack,
2698 // end register would be r4 in this case.
2699 unsigned ByValRegBegin = Reg;
2700 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2701 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2702 // Note, first register is allocated in the beginning of function already,
2703 // allocate remained amount of registers we need.
2704 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2705 State->AllocateReg(GPRArgRegs);
2706 // A byval parameter that is split between registers and memory needs its
2707 // size truncated here.
2708 // In the case where the entire structure fits in registers, we set the
2709 // size in memory to zero.
2710 Size = std::max<int>(Size - Excess, 0);
2711}
2712
2713/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2714/// for tail call optimization. Targets which want to do tail call
2715/// optimization should implement this function. Note that this function also
2716/// processes musttail calls, so when this function returns false on a valid
2717/// musttail call, a fatal backend error occurs.
2718bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2720 SmallVectorImpl<CCValAssign> &ArgLocs, const bool isIndirect) const {
2721 CallingConv::ID CalleeCC = CLI.CallConv;
2722 SDValue Callee = CLI.Callee;
2723 bool isVarArg = CLI.IsVarArg;
2724 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2725 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2726 const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2727 const SelectionDAG &DAG = CLI.DAG;
2728 MachineFunction &MF = DAG.getMachineFunction();
2729 const Function &CallerF = MF.getFunction();
2730 CallingConv::ID CallerCC = CallerF.getCallingConv();
2731
2732 assert(Subtarget->supportsTailCall());
2733
2734 // Indirect tail-calls require a register to hold the target address. That
2735 // register must be:
2736 // * Allocatable (i.e. r0-r7 if the target is Thumb1).
2737 // * Not callee-saved, so must be one of r0-r3 or r12.
2738 // * Not used to hold an argument to the tail-called function, which might be
2739 // in r0-r3.
2740 // * Not used to hold the return address authentication code, which is in r12
2741 // if enabled.
2742 // Sometimes, no register matches all of these conditions, so we can't do a
2743 // tail-call.
2744 if (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect) {
2745 SmallSet<MCPhysReg, 5> AddressRegisters = {ARM::R0, ARM::R1, ARM::R2,
2746 ARM::R3};
2747 if (!(Subtarget->isThumb1Only() ||
2748 MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true)))
2749 AddressRegisters.insert(ARM::R12);
2750 for (const CCValAssign &AL : ArgLocs)
2751 if (AL.isRegLoc())
2752 AddressRegisters.erase(AL.getLocReg());
2753 if (AddressRegisters.empty()) {
2754 LLVM_DEBUG(dbgs() << "false (no reg to hold function pointer)\n");
2755 return false;
2756 }
2757 }
2758
2759 // Look for obvious safe cases to perform tail call optimization that do not
2760 // require ABI changes. This is what gcc calls sibcall.
2761
2762 // Exception-handling functions need a special set of instructions to indicate
2763 // a return to the hardware. Tail-calling another function would probably
2764 // break this.
2765 if (CallerF.hasFnAttribute("interrupt")) {
2766 LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n");
2767 return false;
2768 }
2769
2770 if (canGuaranteeTCO(CalleeCC,
2771 getTargetMachine().Options.GuaranteedTailCallOpt)) {
2772 LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false")
2773 << " (guaranteed tail-call CC)\n");
2774 return CalleeCC == CallerCC;
2775 }
2776
2777 // Also avoid sibcall optimization if either caller or callee uses struct
2778 // return semantics.
2779 bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
2780 bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
2781 if (isCalleeStructRet != isCallerStructRet) {
2782 LLVM_DEBUG(dbgs() << "false (struct-ret)\n");
2783 return false;
2784 }
2785
2786 // Externally-defined functions with weak linkage should not be
2787 // tail-called on ARM when the OS does not support dynamic
2788 // pre-emption of symbols, as the AAELF spec requires normal calls
2789 // to undefined weak functions to be replaced with a NOP or jump to the
2790 // next instruction. The behaviour of branch instructions in this
2791 // situation (as used for tail calls) is implementation-defined, so we
2792 // cannot rely on the linker replacing the tail call with a return.
2793 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2794 const GlobalValue *GV = G->getGlobal();
2795 const Triple &TT = getTargetMachine().getTargetTriple();
2796 if (GV->hasExternalWeakLinkage() &&
2797 (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
2798 TT.isOSBinFormatMachO())) {
2799 LLVM_DEBUG(dbgs() << "false (external weak linkage)\n");
2800 return false;
2801 }
2802 }
2803
2804 // Check that the call results are passed in the same way.
2805 LLVMContext &C = *DAG.getContext();
2807 getEffectiveCallingConv(CalleeCC, isVarArg),
2808 getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
2809 CCAssignFnForReturn(CalleeCC, isVarArg),
2810 CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) {
2811 LLVM_DEBUG(dbgs() << "false (incompatible results)\n");
2812 return false;
2813 }
2814 // The callee has to preserve all registers the caller needs to preserve.
2815 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2816 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2817 if (CalleeCC != CallerCC) {
2818 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2819 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) {
2820 LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n");
2821 return false;
2822 }
2823 }
2824
2825 // If Caller's vararg argument has been split between registers and stack, do
2826 // not perform tail call, since part of the argument is in caller's local
2827 // frame.
2828 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2829 if (CLI.IsVarArg && AFI_Caller->getArgRegsSaveSize()) {
2830 LLVM_DEBUG(dbgs() << "false (arg reg save area)\n");
2831 return false;
2832 }
2833
2834 // If the callee takes no arguments then go on to check the results of the
2835 // call.
2836 const MachineRegisterInfo &MRI = MF.getRegInfo();
2837 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) {
2838 LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n");
2839 return false;
2840 }
2841
2842 // If the stack arguments for this call do not fit into our own save area then
2843 // the call cannot be made tail.
2844 if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize())
2845 return false;
2846
2847 LLVM_DEBUG(dbgs() << "true\n");
2848 return true;
2849}
2850
2851bool
2852ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2853 MachineFunction &MF, bool isVarArg,
2855 LLVMContext &Context, const Type *RetTy) const {
2857 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2858 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2859}
2860
2862 const SDLoc &DL, SelectionDAG &DAG) {
2863 const MachineFunction &MF = DAG.getMachineFunction();
2864 const Function &F = MF.getFunction();
2865
2866 StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
2867
2868 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2869 // version of the "preferred return address". These offsets affect the return
2870 // instruction if this is a return from PL1 without hypervisor extensions.
2871 // IRQ/FIQ: +4 "subs pc, lr, #4"
2872 // SWI: 0 "subs pc, lr, #0"
2873 // ABORT: +4 "subs pc, lr, #4"
2874 // UNDEF: +4/+2 "subs pc, lr, #0"
2875 // UNDEF varies depending on where the exception came from ARM or Thumb
2876 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2877
2878 int64_t LROffset;
2879 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2880 IntKind == "ABORT")
2881 LROffset = 4;
2882 else if (IntKind == "SWI" || IntKind == "UNDEF")
2883 LROffset = 0;
2884 else
2885 report_fatal_error("Unsupported interrupt attribute. If present, value "
2886 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2887
2888 RetOps.insert(RetOps.begin() + 1,
2889 DAG.getConstant(LROffset, DL, MVT::i32, false));
2890
2891 return DAG.getNode(ARMISD::INTRET_GLUE, DL, MVT::Other, RetOps);
2892}
2893
2894SDValue
2895ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2896 bool isVarArg,
2898 const SmallVectorImpl<SDValue> &OutVals,
2899 const SDLoc &dl, SelectionDAG &DAG) const {
2900 // CCValAssign - represent the assignment of the return value to a location.
2902
2903 // CCState - Info about the registers and stack slots.
2904 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2905 *DAG.getContext());
2906
2907 // Analyze outgoing return values.
2908 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2909
2910 SDValue Glue;
2912 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2913 bool isLittleEndian = Subtarget->isLittle();
2914
2915 MachineFunction &MF = DAG.getMachineFunction();
2916 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2917 AFI->setReturnRegsCount(RVLocs.size());
2918
2919 // Report error if cmse entry function returns structure through first ptr arg.
2920 if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
2921 // Note: using an empty SDLoc(), as the first line of the function is a
2922 // better place to report than the last line.
2923 DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
2925 "secure entry function would return value through pointer",
2926 SDLoc().getDebugLoc()));
2927 }
2928
2929 // Copy the result values into the output registers.
2930 for (unsigned i = 0, realRVLocIdx = 0;
2931 i != RVLocs.size();
2932 ++i, ++realRVLocIdx) {
2933 CCValAssign &VA = RVLocs[i];
2934 assert(VA.isRegLoc() && "Can only return in registers!");
2935
2936 SDValue Arg = OutVals[realRVLocIdx];
2937 bool ReturnF16 = false;
2938
2939 if (Subtarget->hasFullFP16() && getTM().isTargetHardFloat()) {
2940 // Half-precision return values can be returned like this:
2941 //
2942 // t11 f16 = fadd ...
2943 // t12: i16 = bitcast t11
2944 // t13: i32 = zero_extend t12
2945 // t14: f32 = bitcast t13 <~~~~~~~ Arg
2946 //
2947 // to avoid code generation for bitcasts, we simply set Arg to the node
2948 // that produces the f16 value, t11 in this case.
2949 //
2950 if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
2951 SDValue ZE = Arg.getOperand(0);
2952 if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
2953 SDValue BC = ZE.getOperand(0);
2954 if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
2955 Arg = BC.getOperand(0);
2956 ReturnF16 = true;
2957 }
2958 }
2959 }
2960 }
2961
2962 switch (VA.getLocInfo()) {
2963 default: llvm_unreachable("Unknown loc info!");
2964 case CCValAssign::Full: break;
2965 case CCValAssign::BCvt:
2966 if (!ReturnF16)
2967 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2968 break;
2969 }
2970
2971 // Mask f16 arguments if this is a CMSE nonsecure entry.
2972 auto RetVT = Outs[realRVLocIdx].ArgVT;
2973 if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
2974 if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
2975 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2976 } else {
2977 auto LocBits = VA.getLocVT().getSizeInBits();
2978 auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
2979 SDValue Mask =
2980 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2981 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2982 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2983 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2984 }
2985 }
2986
2987 if (VA.needsCustom() &&
2988 (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
2989 if (VA.getLocVT() == MVT::v2f64) {
2990 // Extract the first half and return it in two registers.
2991 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2992 DAG.getConstant(0, dl, MVT::i32));
2993 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2994 DAG.getVTList(MVT::i32, MVT::i32), Half);
2995
2996 Chain =
2997 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2998 HalfGPRs.getValue(isLittleEndian ? 0 : 1), Glue);
2999 Glue = Chain.getValue(1);
3000 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3001 VA = RVLocs[++i]; // skip ahead to next loc
3002 Chain =
3003 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3004 HalfGPRs.getValue(isLittleEndian ? 1 : 0), Glue);
3005 Glue = Chain.getValue(1);
3006 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3007 VA = RVLocs[++i]; // skip ahead to next loc
3008
3009 // Extract the 2nd half and fall through to handle it as an f64 value.
3010 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3011 DAG.getConstant(1, dl, MVT::i32));
3012 }
3013 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3014 // available.
3015 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3016 DAG.getVTList(MVT::i32, MVT::i32), Arg);
3017 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3018 fmrrd.getValue(isLittleEndian ? 0 : 1), Glue);
3019 Glue = Chain.getValue(1);
3020 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3021 VA = RVLocs[++i]; // skip ahead to next loc
3022 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3023 fmrrd.getValue(isLittleEndian ? 1 : 0), Glue);
3024 } else
3025 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
3026
3027 // Guarantee that all emitted copies are
3028 // stuck together, avoiding something bad.
3029 Glue = Chain.getValue(1);
3030 RetOps.push_back(DAG.getRegister(
3031 VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3032 }
3033 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3034 const MCPhysReg *I =
3035 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3036 if (I) {
3037 for (; *I; ++I) {
3038 if (ARM::GPRRegClass.contains(*I))
3039 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3040 else if (ARM::DPRRegClass.contains(*I))
3042 else
3043 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3044 }
3045 }
3046
3047 // Update chain and glue.
3048 RetOps[0] = Chain;
3049 if (Glue.getNode())
3050 RetOps.push_back(Glue);
3051
3052 // CPUs which aren't M-class use a special sequence to return from
3053 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3054 // though we use "subs pc, lr, #N").
3055 //
3056 // M-class CPUs actually use a normal return sequence with a special
3057 // (hardware-provided) value in LR, so the normal code path works.
3058 if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3059 !Subtarget->isMClass()) {
3060 if (Subtarget->isThumb1Only())
3061 report_fatal_error("interrupt attribute is not supported in Thumb1");
3062 return LowerInterruptReturn(RetOps, dl, DAG);
3063 }
3064
3065 unsigned RetNode =
3066 AFI->isCmseNSEntryFunction() ? ARMISD::SERET_GLUE : ARMISD::RET_GLUE;
3067 return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3068}
3069
3070bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3071 if (N->getNumValues() != 1)
3072 return false;
3073 if (!N->hasNUsesOfValue(1, 0))
3074 return false;
3075
3076 SDValue TCChain = Chain;
3077 SDNode *Copy = *N->user_begin();
3078 if (Copy->getOpcode() == ISD::CopyToReg) {
3079 // If the copy has a glue operand, we conservatively assume it isn't safe to
3080 // perform a tail call.
3081 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3082 return false;
3083 TCChain = Copy->getOperand(0);
3084 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3085 SDNode *VMov = Copy;
3086 // f64 returned in a pair of GPRs.
3087 SmallPtrSet<SDNode*, 2> Copies;
3088 for (SDNode *U : VMov->users()) {
3089 if (U->getOpcode() != ISD::CopyToReg)
3090 return false;
3091 Copies.insert(U);
3092 }
3093 if (Copies.size() > 2)
3094 return false;
3095
3096 for (SDNode *U : VMov->users()) {
3097 SDValue UseChain = U->getOperand(0);
3098 if (Copies.count(UseChain.getNode()))
3099 // Second CopyToReg
3100 Copy = U;
3101 else {
3102 // We are at the top of this chain.
3103 // If the copy has a glue operand, we conservatively assume it
3104 // isn't safe to perform a tail call.
3105 if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3106 return false;
3107 // First CopyToReg
3108 TCChain = UseChain;
3109 }
3110 }
3111 } else if (Copy->getOpcode() == ISD::BITCAST) {
3112 // f32 returned in a single GPR.
3113 if (!Copy->hasOneUse())
3114 return false;
3115 Copy = *Copy->user_begin();
3116 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3117 return false;
3118 // If the copy has a glue operand, we conservatively assume it isn't safe to
3119 // perform a tail call.
3120 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3121 return false;
3122 TCChain = Copy->getOperand(0);
3123 } else {
3124 return false;
3125 }
3126
3127 bool HasRet = false;
3128 for (const SDNode *U : Copy->users()) {
3129 if (U->getOpcode() != ARMISD::RET_GLUE &&
3130 U->getOpcode() != ARMISD::INTRET_GLUE)
3131 return false;
3132 HasRet = true;
3133 }
3134
3135 if (!HasRet)
3136 return false;
3137
3138 Chain = TCChain;
3139 return true;
3140}
3141
3142bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3143 if (!Subtarget->supportsTailCall())
3144 return false;
3145
3146 if (!CI->isTailCall())
3147 return false;
3148
3149 return true;
3150}
3151
3152// Trying to write a 64 bit value so need to split into two 32 bit values first,
3153// and pass the lower and high parts through.
3155 SDLoc DL(Op);
3156 SDValue WriteValue = Op->getOperand(2);
3157
3158 // This function is only supposed to be called for i64 type argument.
3159 assert(WriteValue.getValueType() == MVT::i64
3160 && "LowerWRITE_REGISTER called for non-i64 type argument.");
3161
3162 SDValue Lo, Hi;
3163 std::tie(Lo, Hi) = DAG.SplitScalar(WriteValue, DL, MVT::i32, MVT::i32);
3164 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3165 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3166}
3167
3168// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3169// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3170// one of the above mentioned nodes. It has to be wrapped because otherwise
3171// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3172// be used to form addressing mode. These wrapped nodes will be selected
3173// into MOVi.
3174SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3175 SelectionDAG &DAG) const {
3176 EVT PtrVT = Op.getValueType();
3177 // FIXME there is no actual debug info here
3178 SDLoc dl(Op);
3179 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3180 SDValue Res;
3181
3182 // When generating execute-only code Constant Pools must be promoted to the
3183 // global data section. It's a bit ugly that we can't share them across basic
3184 // blocks, but this way we guarantee that execute-only behaves correct with
3185 // position-independent addressing modes.
3186 if (Subtarget->genExecuteOnly()) {
3187 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3188 auto *T = CP->getType();
3189 auto C = const_cast<Constant*>(CP->getConstVal());
3190 auto M = DAG.getMachineFunction().getFunction().getParent();
3191 auto GV = new GlobalVariable(
3192 *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3193 Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
3194 Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
3195 Twine(AFI->createPICLabelUId())
3196 );
3198 dl, PtrVT);
3199 return LowerGlobalAddress(GA, DAG);
3200 }
3201
3202 // The 16-bit ADR instruction can only encode offsets that are multiples of 4,
3203 // so we need to align to at least 4 bytes when we don't have 32-bit ADR.
3204 Align CPAlign = CP->getAlign();
3205 if (Subtarget->isThumb1Only())
3206 CPAlign = std::max(CPAlign, Align(4));
3207 if (CP->isMachineConstantPoolEntry())
3208 Res =
3209 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign);
3210 else
3211 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign);
3212 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3213}
3214
3216 // If we don't have a 32-bit pc-relative branch instruction then the jump
3217 // table consists of block addresses. Usually this is inline, but for
3218 // execute-only it must be placed out-of-line.
3219 if (Subtarget->genExecuteOnly() && !Subtarget->hasV8MBaselineOps())
3222}
3223
3224SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3225 SelectionDAG &DAG) const {
3228 unsigned ARMPCLabelIndex = 0;
3229 SDLoc DL(Op);
3230 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3231 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3232 SDValue CPAddr;
3233 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3234 if (!IsPositionIndependent) {
3235 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3236 } else {
3237 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3238 ARMPCLabelIndex = AFI->createPICLabelUId();
3240 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3241 ARMCP::CPBlockAddress, PCAdj);
3242 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3243 }
3244 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3245 SDValue Result = DAG.getLoad(
3246 PtrVT, DL, DAG.getEntryNode(), CPAddr,
3248 if (!IsPositionIndependent)
3249 return Result;
3250 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3251 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3252}
3253
3254/// Convert a TLS address reference into the correct sequence of loads
3255/// and calls to compute the variable's address for Darwin, and return an
3256/// SDValue containing the final node.
3257
3258/// Darwin only has one TLS scheme which must be capable of dealing with the
3259/// fully general situation, in the worst case. This means:
3260/// + "extern __thread" declaration.
3261/// + Defined in a possibly unknown dynamic library.
3262///
3263/// The general system is that each __thread variable has a [3 x i32] descriptor
3264/// which contains information used by the runtime to calculate the address. The
3265/// only part of this the compiler needs to know about is the first word, which
3266/// contains a function pointer that must be called with the address of the
3267/// entire descriptor in "r0".
3268///
3269/// Since this descriptor may be in a different unit, in general access must
3270/// proceed along the usual ARM rules. A common sequence to produce is:
3271///
3272/// movw rT1, :lower16:_var$non_lazy_ptr
3273/// movt rT1, :upper16:_var$non_lazy_ptr
3274/// ldr r0, [rT1]
3275/// ldr rT2, [r0]
3276/// blx rT2
3277/// [...address now in r0...]
3278SDValue
3279ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3280 SelectionDAG &DAG) const {
3281 assert(Subtarget->isTargetDarwin() &&
3282 "This function expects a Darwin target");
3283 SDLoc DL(Op);
3284
3285 // First step is to get the address of the actua global symbol. This is where
3286 // the TLS descriptor lives.
3287 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3288
3289 // The first entry in the descriptor is a function pointer that we must call
3290 // to obtain the address of the variable.
3291 SDValue Chain = DAG.getEntryNode();
3292 SDValue FuncTLVGet = DAG.getLoad(
3293 MVT::i32, DL, Chain, DescAddr,
3297 Chain = FuncTLVGet.getValue(1);
3298
3299 MachineFunction &F = DAG.getMachineFunction();
3300 MachineFrameInfo &MFI = F.getFrameInfo();
3301 MFI.setAdjustsStack(true);
3302
3303 // TLS calls preserve all registers except those that absolutely must be
3304 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3305 // silly).
3306 auto TRI =
3308 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3309 const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
3310
3311 // Finally, we can make the call. This is just a degenerate version of a
3312 // normal AArch64 call node: r0 takes the address of the descriptor, and
3313 // returns the address of the variable in this thread.
3314 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3315 Chain =
3316 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3317 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3318 DAG.getRegisterMask(Mask), Chain.getValue(1));
3319 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3320}
3321
3322SDValue
3323ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3324 SelectionDAG &DAG) const {
3325 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3326
3327 SDValue Chain = DAG.getEntryNode();
3328 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3329 SDLoc DL(Op);
3330
3331 // Load the current TEB (thread environment block)
3332 SDValue Ops[] = {Chain,
3333 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3334 DAG.getTargetConstant(15, DL, MVT::i32),
3335 DAG.getTargetConstant(0, DL, MVT::i32),
3336 DAG.getTargetConstant(13, DL, MVT::i32),
3337 DAG.getTargetConstant(0, DL, MVT::i32),
3338 DAG.getTargetConstant(2, DL, MVT::i32)};
3339 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3340 DAG.getVTList(MVT::i32, MVT::Other), Ops);
3341
3342 SDValue TEB = CurrentTEB.getValue(0);
3343 Chain = CurrentTEB.getValue(1);
3344
3345 // Load the ThreadLocalStoragePointer from the TEB
3346 // A pointer to the TLS array is located at offset 0x2c from the TEB.
3347 SDValue TLSArray =
3348 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3349 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3350
3351 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3352 // offset into the TLSArray.
3353
3354 // Load the TLS index from the C runtime
3355 SDValue TLSIndex =
3356 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3357 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3358 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3359
3360 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3361 DAG.getConstant(2, DL, MVT::i32));
3362 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3363 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3364 MachinePointerInfo());
3365
3366 // Get the offset of the start of the .tls section (section base)
3367 const auto *GA = cast<GlobalAddressSDNode>(Op);
3368 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3369 SDValue Offset = DAG.getLoad(
3370 PtrVT, DL, Chain,
3371 DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3372 DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3374
3375 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3376}
3377
3378// Lower ISD::GlobalTLSAddress using the "general dynamic" model
3379SDValue
3380ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3381 SelectionDAG &DAG) const {
3382 SDLoc dl(GA);
3383 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3384 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3385 MachineFunction &MF = DAG.getMachineFunction();
3386 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3387 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3388 ARMConstantPoolValue *CPV =
3389 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3390 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3391 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3392 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3393 Argument = DAG.getLoad(
3394 PtrVT, dl, DAG.getEntryNode(), Argument,
3396 SDValue Chain = Argument.getValue(1);
3397
3398 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3399 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3400
3401 // call __tls_get_addr.
3403 Args.emplace_back(Argument, Type::getInt32Ty(*DAG.getContext()));
3404
3405 // FIXME: is there useful debug info available here?
3406 TargetLowering::CallLoweringInfo CLI(DAG);
3407 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3409 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3410
3411 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3412 return CallResult.first;
3413}
3414
3415// Lower ISD::GlobalTLSAddress using the "initial exec" or
3416// "local exec" model.
3417SDValue
3418ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3419 SelectionDAG &DAG,
3420 TLSModel::Model model) const {
3421 const GlobalValue *GV = GA->getGlobal();
3422 SDLoc dl(GA);
3424 SDValue Chain = DAG.getEntryNode();
3425 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3426 // Get the Thread Pointer
3427 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3428
3429 if (model == TLSModel::InitialExec) {
3430 MachineFunction &MF = DAG.getMachineFunction();
3431 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3432 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3433 // Initial exec model.
3434 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3435 ARMConstantPoolValue *CPV =
3436 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3438 true);
3439 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3440 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3441 Offset = DAG.getLoad(
3442 PtrVT, dl, Chain, Offset,
3444 Chain = Offset.getValue(1);
3445
3446 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3447 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3448
3449 Offset = DAG.getLoad(
3450 PtrVT, dl, Chain, Offset,
3452 } else {
3453 // local exec model
3454 assert(model == TLSModel::LocalExec);
3455 ARMConstantPoolValue *CPV =
3457 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3458 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3459 Offset = DAG.getLoad(
3460 PtrVT, dl, Chain, Offset,
3462 }
3463
3464 // The address of the thread local variable is the add of the thread
3465 // pointer with the offset of the variable.
3466 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3467}
3468
3469SDValue
3470ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3471 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3472 if (DAG.getTarget().useEmulatedTLS())
3473 return LowerToTLSEmulatedModel(GA, DAG);
3474
3475 if (Subtarget->isTargetDarwin())
3476 return LowerGlobalTLSAddressDarwin(Op, DAG);
3477
3478 if (Subtarget->isTargetWindows())
3479 return LowerGlobalTLSAddressWindows(Op, DAG);
3480
3481 // TODO: implement the "local dynamic" model
3482 assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3484
3485 switch (model) {
3488 return LowerToTLSGeneralDynamicModel(GA, DAG);
3491 return LowerToTLSExecModels(GA, DAG, model);
3492 }
3493 llvm_unreachable("bogus TLS model");
3494}
3495
3496/// Return true if all users of V are within function F, looking through
3497/// ConstantExprs.
3498static bool allUsersAreInFunction(const Value *V, const Function *F) {
3499 SmallVector<const User*,4> Worklist(V->users());
3500 while (!Worklist.empty()) {
3501 auto *U = Worklist.pop_back_val();
3502 if (isa<ConstantExpr>(U)) {
3503 append_range(Worklist, U->users());
3504 continue;
3505 }
3506
3507 auto *I = dyn_cast<Instruction>(U);
3508 if (!I || I->getParent()->getParent() != F)
3509 return false;
3510 }
3511 return true;
3512}
3513
3515 const GlobalValue *GV, SelectionDAG &DAG,
3516 EVT PtrVT, const SDLoc &dl) {
3517 // If we're creating a pool entry for a constant global with unnamed address,
3518 // and the global is small enough, we can emit it inline into the constant pool
3519 // to save ourselves an indirection.
3520 //
3521 // This is a win if the constant is only used in one function (so it doesn't
3522 // need to be duplicated) or duplicating the constant wouldn't increase code
3523 // size (implying the constant is no larger than 4 bytes).
3524 const Function &F = DAG.getMachineFunction().getFunction();
3525
3526 // We rely on this decision to inline being idemopotent and unrelated to the
3527 // use-site. We know that if we inline a variable at one use site, we'll
3528 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3529 // doesn't know about this optimization, so bail out if it's enabled else
3530 // we could decide to inline here (and thus never emit the GV) but require
3531 // the GV from fast-isel generated code.
3534 return SDValue();
3535
3536 auto *GVar = dyn_cast<GlobalVariable>(GV);
3537 if (!GVar || !GVar->hasInitializer() ||
3538 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3539 !GVar->hasLocalLinkage())
3540 return SDValue();
3541
3542 // If we inline a value that contains relocations, we move the relocations
3543 // from .data to .text. This is not allowed in position-independent code.
3544 auto *Init = GVar->getInitializer();
3545 if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3546 Init->needsDynamicRelocation())
3547 return SDValue();
3548
3549 // The constant islands pass can only really deal with alignment requests
3550 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3551 // any type wanting greater alignment requirements than 4 bytes. We also
3552 // can only promote constants that are multiples of 4 bytes in size or
3553 // are paddable to a multiple of 4. Currently we only try and pad constants
3554 // that are strings for simplicity.
3555 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3556 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3557 Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
3558 unsigned RequiredPadding = 4 - (Size % 4);
3559 bool PaddingPossible =
3560 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3561 if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
3562 Size == 0)
3563 return SDValue();
3564
3565 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3567 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3568
3569 // We can't bloat the constant pool too much, else the ConstantIslands pass
3570 // may fail to converge. If we haven't promoted this global yet (it may have
3571 // multiple uses), and promoting it would increase the constant pool size (Sz
3572 // > 4), ensure we have space to do so up to MaxTotal.
3573 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3574 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3576 return SDValue();
3577
3578 // This is only valid if all users are in a single function; we can't clone
3579 // the constant in general. The LLVM IR unnamed_addr allows merging
3580 // constants, but not cloning them.
3581 //
3582 // We could potentially allow cloning if we could prove all uses of the
3583 // constant in the current function don't care about the address, like
3584 // printf format strings. But that isn't implemented for now.
3585 if (!allUsersAreInFunction(GVar, &F))
3586 return SDValue();
3587
3588 // We're going to inline this global. Pad it out if needed.
3589 if (RequiredPadding != 4) {
3590 StringRef S = CDAInit->getAsString();
3591
3593 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3594 while (RequiredPadding--)
3595 V.push_back(0);
3597 }
3598
3599 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3600 SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4));
3601 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3604 PaddedSize - 4);
3605 }
3606 ++NumConstpoolPromoted;
3607 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3608}
3609
3611 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3612 if (!(GV = GA->getAliaseeObject()))
3613 return false;
3614 if (const auto *V = dyn_cast<GlobalVariable>(GV))
3615 return V->isConstant();
3616 return isa<Function>(GV);
3617}
3618
3619SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3620 SelectionDAG &DAG) const {
3621 switch (Subtarget->getTargetTriple().getObjectFormat()) {
3622 default: llvm_unreachable("unknown object format");
3623 case Triple::COFF:
3624 return LowerGlobalAddressWindows(Op, DAG);
3625 case Triple::ELF:
3626 return LowerGlobalAddressELF(Op, DAG);
3627 case Triple::MachO:
3628 return LowerGlobalAddressDarwin(Op, DAG);
3629 }
3630}
3631
3632SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3633 SelectionDAG &DAG) const {
3634 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3635 SDLoc dl(Op);
3636 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3637 bool IsRO = isReadOnly(GV);
3638
3639 // promoteToConstantPool only if not generating XO text section
3640 if (GV->isDSOLocal() && !Subtarget->genExecuteOnly())
3641 if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3642 return V;
3643
3644 if (isPositionIndependent()) {
3646 GV, dl, PtrVT, 0, GV->isDSOLocal() ? 0 : ARMII::MO_GOT);
3647 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3648 if (!GV->isDSOLocal())
3649 Result =
3650 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3652 return Result;
3653 } else if (Subtarget->isROPI() && IsRO) {
3654 // PC-relative.
3655 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3656 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3657 return Result;
3658 } else if (Subtarget->isRWPI() && !IsRO) {
3659 // SB-relative.
3660 SDValue RelAddr;
3661 if (Subtarget->useMovt()) {
3662 ++NumMovwMovt;
3663 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3664 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3665 } else { // use literal pool for address constant
3666 ARMConstantPoolValue *CPV =
3668 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3669 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3670 RelAddr = DAG.getLoad(
3671 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3673 }
3674 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3675 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3676 return Result;
3677 }
3678
3679 // If we have T2 ops, we can materialize the address directly via movt/movw
3680 // pair. This is always cheaper. If need to generate Execute Only code, and we
3681 // only have Thumb1 available, we can't use a constant pool and are forced to
3682 // use immediate relocations.
3683 if (Subtarget->useMovt() || Subtarget->genExecuteOnly()) {
3684 if (Subtarget->useMovt())
3685 ++NumMovwMovt;
3686 // FIXME: Once remat is capable of dealing with instructions with register
3687 // operands, expand this into two nodes.
3688 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3689 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3690 } else {
3691 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4));
3692 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3693 return DAG.getLoad(
3694 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3696 }
3697}
3698
3699SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3700 SelectionDAG &DAG) const {
3701 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3702 "ROPI/RWPI not currently supported for Darwin");
3703 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3704 SDLoc dl(Op);
3705 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3706
3707 if (Subtarget->useMovt())
3708 ++NumMovwMovt;
3709
3710 // FIXME: Once remat is capable of dealing with instructions with register
3711 // operands, expand this into multiple nodes
3712 unsigned Wrapper =
3713 isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper;
3714
3715 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3716 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3717
3718 if (Subtarget->isGVIndirectSymbol(GV))
3719 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3721 return Result;
3722}
3723
3724SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3725 SelectionDAG &DAG) const {
3726 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3727 assert(Subtarget->useMovt() &&
3728 "Windows on ARM expects to use movw/movt");
3729 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3730 "ROPI/RWPI not currently supported for Windows");
3731
3732 const TargetMachine &TM = getTargetMachine();
3733 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3734 ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
3735 if (GV->hasDLLImportStorageClass())
3736 TargetFlags = ARMII::MO_DLLIMPORT;
3737 else if (!TM.shouldAssumeDSOLocal(GV))
3738 TargetFlags = ARMII::MO_COFFSTUB;
3739 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3741 SDLoc DL(Op);
3742
3743 ++NumMovwMovt;
3744
3745 // FIXME: Once remat is capable of dealing with instructions with register
3746 // operands, expand this into two nodes.
3747 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3748 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
3749 TargetFlags));
3750 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
3751 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3753 return Result;
3754}
3755
3756SDValue
3757ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3758 SDLoc dl(Op);
3759 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3760 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3761 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3762 Op.getOperand(1), Val);
3763}
3764
3765SDValue
3766ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3767 SDLoc dl(Op);
3768 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3769 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3770}
3771
3772SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3773 SelectionDAG &DAG) const {
3774 SDLoc dl(Op);
3775 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
3776 Op.getOperand(0));
3777}
3778
3779SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
3780 SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
3781 unsigned IntNo =
3782 Op.getConstantOperandVal(Op.getOperand(0).getValueType() == MVT::Other);
3783 switch (IntNo) {
3784 default:
3785 return SDValue(); // Don't custom lower most intrinsics.
3786 case Intrinsic::arm_gnu_eabi_mcount: {
3787 MachineFunction &MF = DAG.getMachineFunction();
3788 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3789 SDLoc dl(Op);
3790 SDValue Chain = Op.getOperand(0);
3791 // call "\01__gnu_mcount_nc"
3792 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
3793 const uint32_t *Mask =
3795 assert(Mask && "Missing call preserved mask for calling convention");
3796 // Mark LR an implicit live-in.
3797 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
3798 SDValue ReturnAddress =
3799 DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
3800 constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
3801 SDValue Callee =
3802 DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
3804 if (Subtarget->isThumb())
3805 return SDValue(
3806 DAG.getMachineNode(
3807 ARM::tBL_PUSHLR, dl, ResultTys,
3808 {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
3809 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
3810 0);
3811 return SDValue(
3812 DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
3813 {ReturnAddress, Callee, RegisterMask, Chain}),
3814 0);
3815 }
3816 }
3817}
3818
3819SDValue
3820ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3821 const ARMSubtarget *Subtarget) const {
3822 unsigned IntNo = Op.getConstantOperandVal(0);
3823 SDLoc dl(Op);
3824 switch (IntNo) {
3825 default: return SDValue(); // Don't custom lower most intrinsics.
3826 case Intrinsic::thread_pointer: {
3827 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3828 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3829 }
3830 case Intrinsic::arm_cls: {
3831 const SDValue &Operand = Op.getOperand(1);
3832 const EVT VTy = Op.getValueType();
3833 SDValue SRA =
3834 DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
3835 SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
3836 SDValue SHL =
3837 DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
3838 SDValue OR =
3839 DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
3840 SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
3841 return Result;
3842 }
3843 case Intrinsic::arm_cls64: {
3844 // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
3845 // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
3846 const SDValue &Operand = Op.getOperand(1);
3847 const EVT VTy = Op.getValueType();
3848 SDValue Lo, Hi;
3849 std::tie(Lo, Hi) = DAG.SplitScalar(Operand, dl, VTy, VTy);
3850 SDValue Constant0 = DAG.getConstant(0, dl, VTy);
3851 SDValue Constant1 = DAG.getConstant(1, dl, VTy);
3852 SDValue Constant31 = DAG.getConstant(31, dl, VTy);
3853 SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
3854 SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
3855 SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
3856 SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
3857 SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
3858 SDValue CheckLo =
3859 DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
3860 SDValue HiIsZero =
3861 DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
3862 SDValue AdjustedLo =
3863 DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
3864 SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
3865 SDValue Result =
3866 DAG.getSelect(dl, VTy, CheckLo,
3867 DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
3868 return Result;
3869 }
3870 case Intrinsic::eh_sjlj_lsda: {
3871 MachineFunction &MF = DAG.getMachineFunction();
3872 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3873 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3874 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3875 SDValue CPAddr;
3876 bool IsPositionIndependent = isPositionIndependent();
3877 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3878 ARMConstantPoolValue *CPV =
3879 ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
3880 ARMCP::CPLSDA, PCAdj);
3881 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3882 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3883 SDValue Result = DAG.getLoad(
3884 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3886
3887 if (IsPositionIndependent) {
3888 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3889 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3890 }
3891 return Result;
3892 }
3893 case Intrinsic::arm_neon_vabs:
3894 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3895 Op.getOperand(1));
3896 case Intrinsic::arm_neon_vabds:
3897 if (Op.getValueType().isInteger())
3898 return DAG.getNode(ISD::ABDS, SDLoc(Op), Op.getValueType(),
3899 Op.getOperand(1), Op.getOperand(2));
3900 return SDValue();
3901 case Intrinsic::arm_neon_vabdu:
3902 return DAG.getNode(ISD::ABDU, SDLoc(Op), Op.getValueType(),
3903 Op.getOperand(1), Op.getOperand(2));
3904 case Intrinsic::arm_neon_vmulls:
3905 case Intrinsic::arm_neon_vmullu: {
3906 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3907 ? ARMISD::VMULLs : ARMISD::VMULLu;
3908 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3909 Op.getOperand(1), Op.getOperand(2));
3910 }
3911 case Intrinsic::arm_neon_vminnm:
3912 case Intrinsic::arm_neon_vmaxnm: {
3913 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3914 ? ISD::FMINNUM : ISD::FMAXNUM;
3915 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3916 Op.getOperand(1), Op.getOperand(2));
3917 }
3918 case Intrinsic::arm_neon_vminu:
3919 case Intrinsic::arm_neon_vmaxu: {
3920 if (Op.getValueType().isFloatingPoint())
3921 return SDValue();
3922 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3923 ? ISD::UMIN : ISD::UMAX;
3924 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3925 Op.getOperand(1), Op.getOperand(2));
3926 }
3927 case Intrinsic::arm_neon_vmins:
3928 case Intrinsic::arm_neon_vmaxs: {
3929 // v{min,max}s is overloaded between signed integers and floats.
3930 if (!Op.getValueType().isFloatingPoint()) {
3931 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3932 ? ISD::SMIN : ISD::SMAX;
3933 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3934 Op.getOperand(1), Op.getOperand(2));
3935 }
3936 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3937 ? ISD::FMINIMUM : ISD::FMAXIMUM;
3938 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3939 Op.getOperand(1), Op.getOperand(2));
3940 }
3941 case Intrinsic::arm_neon_vtbl1:
3942 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3943 Op.getOperand(1), Op.getOperand(2));
3944 case Intrinsic::arm_neon_vtbl2:
3945 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3946 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3947 case Intrinsic::arm_mve_pred_i2v:
3948 case Intrinsic::arm_mve_pred_v2i:
3949 return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),
3950 Op.getOperand(1));
3951 case Intrinsic::arm_mve_vreinterpretq:
3952 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),
3953 Op.getOperand(1));
3954 case Intrinsic::arm_mve_lsll:
3955 return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
3956 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3957 case Intrinsic::arm_mve_asrl:
3958 return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
3959 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3960 }
3961}
3962
3964 const ARMSubtarget *Subtarget) {
3965 SDLoc dl(Op);
3966 auto SSID = static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3967 if (SSID == SyncScope::SingleThread)
3968 return Op;
3969
3970 if (!Subtarget->hasDataBarrier()) {
3971 // Some ARMv6 cpus can support data barriers with an mcr instruction.
3972 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3973 // here.
3974 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
3975 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
3976 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3977 DAG.getConstant(0, dl, MVT::i32));
3978 }
3979
3980 AtomicOrdering Ord =
3981 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
3983 if (Subtarget->isMClass()) {
3984 // Only a full system barrier exists in the M-class architectures.
3986 } else if (Subtarget->preferISHSTBarriers() &&
3987 Ord == AtomicOrdering::Release) {
3988 // Swift happens to implement ISHST barriers in a way that's compatible with
3989 // Release semantics but weaker than ISH so we'd be fools not to use
3990 // it. Beware: other processors probably don't!
3992 }
3993
3994 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3995 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3996 DAG.getConstant(Domain, dl, MVT::i32));
3997}
3998
4000 const ARMSubtarget *Subtarget) {
4001 // ARM pre v5TE and Thumb1 does not have preload instructions.
4002 if (!(Subtarget->isThumb2() ||
4003 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
4004 // Just preserve the chain.
4005 return Op.getOperand(0);
4006
4007 SDLoc dl(Op);
4008 unsigned isRead = ~Op.getConstantOperandVal(2) & 1;
4009 if (!isRead &&
4010 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4011 // ARMv7 with MP extension has PLDW.
4012 return Op.getOperand(0);
4013
4014 unsigned isData = Op.getConstantOperandVal(4);
4015 if (Subtarget->isThumb()) {
4016 // Invert the bits.
4017 isRead = ~isRead & 1;
4018 isData = ~isData & 1;
4019 }
4020
4021 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
4022 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
4023 DAG.getConstant(isData, dl, MVT::i32));
4024}
4025
4028 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
4029
4030 // vastart just stores the address of the VarArgsFrameIndex slot into the
4031 // memory location argument.
4032 SDLoc dl(Op);
4034 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4035 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4036 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4037 MachinePointerInfo(SV));
4038}
4039
4040SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
4041 CCValAssign &NextVA,
4042 SDValue &Root,
4043 SelectionDAG &DAG,
4044 const SDLoc &dl) const {
4045 MachineFunction &MF = DAG.getMachineFunction();
4046 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4047
4048 const TargetRegisterClass *RC;
4049 if (AFI->isThumb1OnlyFunction())
4050 RC = &ARM::tGPRRegClass;
4051 else
4052 RC = &ARM::GPRRegClass;
4053
4054 // Transform the arguments stored in physical registers into virtual ones.
4055 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4056 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4057
4058 SDValue ArgValue2;
4059 if (NextVA.isMemLoc()) {
4060 MachineFrameInfo &MFI = MF.getFrameInfo();
4061 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
4062
4063 // Create load node to retrieve arguments from the stack.
4064 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4065 ArgValue2 = DAG.getLoad(
4066 MVT::i32, dl, Root, FIN,
4068 } else {
4069 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
4070 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4071 }
4072 if (!Subtarget->isLittle())
4073 std::swap (ArgValue, ArgValue2);
4074 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
4075}
4076
4077// The remaining GPRs hold either the beginning of variable-argument
4078// data, or the beginning of an aggregate passed by value (usually
4079// byval). Either way, we allocate stack slots adjacent to the data
4080// provided by our caller, and store the unallocated registers there.
4081// If this is a variadic function, the va_list pointer will begin with
4082// these values; otherwise, this reassembles a (byval) structure that
4083// was split between registers and memory.
4084// Return: The frame index registers were stored into.
4085int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
4086 const SDLoc &dl, SDValue &Chain,
4087 const Value *OrigArg,
4088 unsigned InRegsParamRecordIdx,
4089 int ArgOffset, unsigned ArgSize) const {
4090 // Currently, two use-cases possible:
4091 // Case #1. Non-var-args function, and we meet first byval parameter.
4092 // Setup first unallocated register as first byval register;
4093 // eat all remained registers
4094 // (these two actions are performed by HandleByVal method).
4095 // Then, here, we initialize stack frame with
4096 // "store-reg" instructions.
4097 // Case #2. Var-args function, that doesn't contain byval parameters.
4098 // The same: eat all remained unallocated registers,
4099 // initialize stack frame.
4100
4101 MachineFunction &MF = DAG.getMachineFunction();
4102 MachineFrameInfo &MFI = MF.getFrameInfo();
4103 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4104 unsigned RBegin, REnd;
4105 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
4106 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
4107 } else {
4108 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4109 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
4110 REnd = ARM::R4;
4111 }
4112
4113 if (REnd != RBegin)
4114 ArgOffset = -4 * (ARM::R4 - RBegin);
4115
4116 auto PtrVT = getPointerTy(DAG.getDataLayout());
4117 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
4118 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
4119
4121 const TargetRegisterClass *RC =
4122 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
4123
4124 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
4125 Register VReg = MF.addLiveIn(Reg, RC);
4126 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4127 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4128 MachinePointerInfo(OrigArg, 4 * i));
4129 MemOps.push_back(Store);
4130 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
4131 }
4132
4133 if (!MemOps.empty())
4134 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4135 return FrameIndex;
4136}
4137
4138// Setup stack frame, the va_list pointer will start from.
4139void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
4140 const SDLoc &dl, SDValue &Chain,
4141 unsigned ArgOffset,
4142 unsigned TotalArgRegsSaveSize,
4143 bool ForceMutable) const {
4144 MachineFunction &MF = DAG.getMachineFunction();
4145 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4146
4147 // Try to store any remaining integer argument regs
4148 // to their spots on the stack so that they may be loaded by dereferencing
4149 // the result of va_next.
4150 // If there is no regs to be stored, just point address after last
4151 // argument passed via stack.
4152 int FrameIndex = StoreByValRegs(
4153 CCInfo, DAG, dl, Chain, nullptr, CCInfo.getInRegsParamsCount(),
4154 CCInfo.getStackSize(), std::max(4U, TotalArgRegsSaveSize));
4155 AFI->setVarArgsFrameIndex(FrameIndex);
4156}
4157
4158bool ARMTargetLowering::splitValueIntoRegisterParts(
4159 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
4160 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
4161 EVT ValueVT = Val.getValueType();
4162 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4163 unsigned ValueBits = ValueVT.getSizeInBits();
4164 unsigned PartBits = PartVT.getSizeInBits();
4165 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
4166 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
4167 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
4168 Parts[0] = Val;
4169 return true;
4170 }
4171 return false;
4172}
4173
4174SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
4175 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
4176 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
4177 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4178 unsigned ValueBits = ValueVT.getSizeInBits();
4179 unsigned PartBits = PartVT.getSizeInBits();
4180 SDValue Val = Parts[0];
4181
4182 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
4183 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
4184 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
4185 return Val;
4186 }
4187 return SDValue();
4188}
4189
4190SDValue ARMTargetLowering::LowerFormalArguments(
4191 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4192 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4193 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4194 MachineFunction &MF = DAG.getMachineFunction();
4195 MachineFrameInfo &MFI = MF.getFrameInfo();
4196
4197 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4198
4199 // Assign locations to all of the incoming arguments.
4201 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4202 *DAG.getContext());
4203 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
4204
4206 unsigned CurArgIdx = 0;
4207
4208 // Initially ArgRegsSaveSize is zero.
4209 // Then we increase this value each time we meet byval parameter.
4210 // We also increase this value in case of varargs function.
4211 AFI->setArgRegsSaveSize(0);
4212
4213 // Calculate the amount of stack space that we need to allocate to store
4214 // byval and variadic arguments that are passed in registers.
4215 // We need to know this before we allocate the first byval or variadic
4216 // argument, as they will be allocated a stack slot below the CFA (Canonical
4217 // Frame Address, the stack pointer at entry to the function).
4218 unsigned ArgRegBegin = ARM::R4;
4219 for (const CCValAssign &VA : ArgLocs) {
4220 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
4221 break;
4222
4223 unsigned Index = VA.getValNo();
4224 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
4225 if (!Flags.isByVal())
4226 continue;
4227
4228 assert(VA.isMemLoc() && "unexpected byval pointer in reg");
4229 unsigned RBegin, REnd;
4230 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
4231 ArgRegBegin = std::min(ArgRegBegin, RBegin);
4232
4233 CCInfo.nextInRegsParam();
4234 }
4235 CCInfo.rewindByValRegsInfo();
4236
4237 int lastInsIndex = -1;
4238 if (isVarArg && MFI.hasVAStart()) {
4239 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4240 if (RegIdx != std::size(GPRArgRegs))
4241 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
4242 }
4243
4244 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4245 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
4246 auto PtrVT = getPointerTy(DAG.getDataLayout());
4247
4248 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4249 CCValAssign &VA = ArgLocs[i];
4250 if (Ins[VA.getValNo()].isOrigArg()) {
4251 std::advance(CurOrigArg,
4252 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
4253 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
4254 }
4255 // Arguments stored in registers.
4256 if (VA.isRegLoc()) {
4257 EVT RegVT = VA.getLocVT();
4258 SDValue ArgValue;
4259
4260 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
4261 // f64 and vector types are split up into multiple registers or
4262 // combinations of registers and stack slots.
4263 SDValue ArgValue1 =
4264 GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4265 VA = ArgLocs[++i]; // skip ahead to next loc
4266 SDValue ArgValue2;
4267 if (VA.isMemLoc()) {
4268 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
4269 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4270 ArgValue2 = DAG.getLoad(
4271 MVT::f64, dl, Chain, FIN,
4273 } else {
4274 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4275 }
4276 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
4277 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4278 ArgValue1, DAG.getIntPtrConstant(0, dl));
4279 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4280 ArgValue2, DAG.getIntPtrConstant(1, dl));
4281 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
4282 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4283 } else {
4284 const TargetRegisterClass *RC;
4285
4286 if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4287 RC = &ARM::HPRRegClass;
4288 else if (RegVT == MVT::f32)
4289 RC = &ARM::SPRRegClass;
4290 else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||
4291 RegVT == MVT::v4bf16)
4292 RC = &ARM::DPRRegClass;
4293 else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||
4294 RegVT == MVT::v8bf16)
4295 RC = &ARM::QPRRegClass;
4296 else if (RegVT == MVT::i32)
4297 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4298 : &ARM::GPRRegClass;
4299 else
4300 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
4301
4302 // Transform the arguments in physical registers into virtual ones.
4303 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4304 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4305
4306 // If this value is passed in r0 and has the returned attribute (e.g.
4307 // C++ 'structors), record this fact for later use.
4308 if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
4309 AFI->setPreservesR0();
4310 }
4311 }
4312
4313 // If this is an 8 or 16-bit value, it is really passed promoted
4314 // to 32 bits. Insert an assert[sz]ext to capture this, then
4315 // truncate to the right size.
4316 switch (VA.getLocInfo()) {
4317 default: llvm_unreachable("Unknown loc info!");
4318 case CCValAssign::Full: break;
4319 case CCValAssign::BCvt:
4320 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
4321 break;
4322 }
4323
4324 // f16 arguments have their size extended to 4 bytes and passed as if they
4325 // had been copied to the LSBs of a 32-bit register.
4326 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
4327 if (VA.needsCustom() &&
4328 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
4329 ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue);
4330
4331 // On CMSE Entry Functions, formal integer arguments whose bitwidth is
4332 // less than 32 bits must be sign- or zero-extended in the callee for
4333 // security reasons. Although the ABI mandates an extension done by the
4334 // caller, the latter cannot be trusted to follow the rules of the ABI.
4335 const ISD::InputArg &Arg = Ins[VA.getValNo()];
4336 if (AFI->isCmseNSEntryFunction() && Arg.ArgVT.isScalarInteger() &&
4337 RegVT.isScalarInteger() && Arg.ArgVT.bitsLT(MVT::i32))
4338 ArgValue = handleCMSEValue(ArgValue, Arg, DAG, dl);
4339
4340 InVals.push_back(ArgValue);
4341 } else { // VA.isRegLoc()
4342 // Only arguments passed on the stack should make it here.
4343 assert(VA.isMemLoc());
4344 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
4345
4346 int index = VA.getValNo();
4347
4348 // Some Ins[] entries become multiple ArgLoc[] entries.
4349 // Process them only once.
4350 if (index != lastInsIndex)
4351 {
4352 ISD::ArgFlagsTy Flags = Ins[index].Flags;
4353 // FIXME: For now, all byval parameter objects are marked mutable.
4354 // This can be changed with more analysis.
4355 // In case of tail call optimization mark all arguments mutable.
4356 // Since they could be overwritten by lowering of arguments in case of
4357 // a tail call.
4358 if (Flags.isByVal()) {
4359 assert(Ins[index].isOrigArg() &&
4360 "Byval arguments cannot be implicit");
4361 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
4362
4363 int FrameIndex = StoreByValRegs(
4364 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4365 VA.getLocMemOffset(), Flags.getByValSize());
4366 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
4367 CCInfo.nextInRegsParam();
4368 } else if (VA.needsCustom() && (VA.getValVT() == MVT::f16 ||
4369 VA.getValVT() == MVT::bf16)) {
4370 // f16 and bf16 values are passed in the least-significant half of
4371 // a 4 byte stack slot. This is done as-if the extension was done
4372 // in a 32-bit register, so the actual bytes used for the value
4373 // differ between little and big endian.
4374 assert(VA.getLocVT().getSizeInBits() == 32);
4375 unsigned FIOffset = VA.getLocMemOffset();
4376 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits() / 8,
4377 FIOffset, true);
4378
4379 SDValue Addr = DAG.getFrameIndex(FI, PtrVT);
4380 if (DAG.getDataLayout().isBigEndian())
4381 Addr = DAG.getObjectPtrOffset(dl, Addr, TypeSize::getFixed(2));
4382
4383 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, Addr,
4385 DAG.getMachineFunction(), FI)));
4386
4387 } else {
4388 unsigned FIOffset = VA.getLocMemOffset();
4389 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
4390 FIOffset, true);
4391
4392 // Create load nodes to retrieve arguments from the stack.
4393 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4394 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
4396 DAG.getMachineFunction(), FI)));
4397 }
4398 lastInsIndex = index;
4399 }
4400 }
4401 }
4402
4403 // varargs
4404 if (isVarArg && MFI.hasVAStart()) {
4405 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getStackSize(),
4406 TotalArgRegsSaveSize);
4407 if (AFI->isCmseNSEntryFunction()) {
4408 DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
4410 "secure entry function must not be variadic", dl.getDebugLoc()));
4411 }
4412 }
4413
4414 unsigned StackArgSize = CCInfo.getStackSize();
4415 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
4416 if (canGuaranteeTCO(CallConv, TailCallOpt)) {
4417 // The only way to guarantee a tail call is if the callee restores its
4418 // argument area, but it must also keep the stack aligned when doing so.
4419 MaybeAlign StackAlign = DAG.getDataLayout().getStackAlignment();
4420 assert(StackAlign && "data layout string is missing stack alignment");
4421 StackArgSize = alignTo(StackArgSize, *StackAlign);
4422
4423 AFI->setArgumentStackToRestore(StackArgSize);
4424 }
4425 AFI->setArgumentStackSize(StackArgSize);
4426
4427 if (CCInfo.getStackSize() > 0 && AFI->isCmseNSEntryFunction()) {
4428 DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
4430 "secure entry function requires arguments on stack", dl.getDebugLoc()));
4431 }
4432
4433 return Chain;
4434}
4435
4436/// isFloatingPointZero - Return true if this is +0.0.
4439 return CFP->getValueAPF().isPosZero();
4440 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
4441 // Maybe this has already been legalized into the constant pool?
4442 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
4443 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
4445 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
4446 return CFP->getValueAPF().isPosZero();
4447 }
4448 } else if (Op->getOpcode() == ISD::BITCAST &&
4449 Op->getValueType(0) == MVT::f64) {
4450 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
4451 // created by LowerConstantFP().
4452 SDValue BitcastOp = Op->getOperand(0);
4453 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
4454 isNullConstant(BitcastOp->getOperand(0)))
4455 return true;
4456 }
4457 return false;
4458}
4459
4460/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
4461/// the given operands.
4462SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4463 SDValue &ARMcc, SelectionDAG &DAG,
4464 const SDLoc &dl) const {
4465 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
4466 unsigned C = RHSC->getZExtValue();
4467 if (!isLegalICmpImmediate((int32_t)C)) {
4468 // Constant does not fit, try adjusting it by one.
4469 switch (CC) {
4470 default: break;
4471 case ISD::SETLT:
4472 case ISD::SETGE:
4473 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4474 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
4475 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4476 }
4477 break;
4478 case ISD::SETULT:
4479 case ISD::SETUGE:
4480 if (C != 0 && isLegalICmpImmediate(C-1)) {
4481 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
4482 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4483 }
4484 break;
4485 case ISD::SETLE:
4486 case ISD::SETGT:
4487 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
4488 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
4489 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4490 }
4491 break;
4492 case ISD::SETULE:
4493 case ISD::SETUGT:
4494 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
4495 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
4496 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4497 }
4498 break;
4499 }
4500 }
4501 } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
4503 // In ARM and Thumb-2, the compare instructions can shift their second
4504 // operand.
4506 std::swap(LHS, RHS);
4507 }
4508
4509 // Thumb1 has very limited immediate modes, so turning an "and" into a
4510 // shift can save multiple instructions.
4511 //
4512 // If we have (x & C1), and C1 is an appropriate mask, we can transform it
4513 // into "((x << n) >> n)". But that isn't necessarily profitable on its
4514 // own. If it's the operand to an unsigned comparison with an immediate,
4515 // we can eliminate one of the shifts: we transform
4516 // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)".
4517 //
4518 // We avoid transforming cases which aren't profitable due to encoding
4519 // details:
4520 //
4521 // 1. C2 fits into the immediate field of a cmp, and the transformed version
4522 // would not; in that case, we're essentially trading one immediate load for
4523 // another.
4524 // 2. C1 is 255 or 65535, so we can use uxtb or uxth.
4525 // 3. C2 is zero; we have other code for this special case.
4526 //
4527 // FIXME: Figure out profitability for Thumb2; we usually can't save an
4528 // instruction, since the AND is always one instruction anyway, but we could
4529 // use narrow instructions in some cases.
4530 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
4531 LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4532 LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&
4533 !isSignedIntSetCC(CC)) {
4534 unsigned Mask = LHS.getConstantOperandVal(1);
4535 auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
4536 uint64_t RHSV = RHSC->getZExtValue();
4537 if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
4538 unsigned ShiftBits = llvm::countl_zero(Mask);
4539 if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4540 SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);
4541 LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);
4542 RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32);
4543 }
4544 }
4545 }
4546
4547 // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a
4548 // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same
4549 // way a cmp would.
4550 // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and
4551 // some tweaks to the heuristics for the previous and->shift transform.
4552 // FIXME: Optimize cases where the LHS isn't a shift.
4553 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
4554 isa<ConstantSDNode>(RHS) && RHS->getAsZExtVal() == 0x80000000U &&
4555 CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4556 LHS.getConstantOperandVal(1) < 31) {
4557 unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1;
4558 SDValue Shift =
4559 DAG.getNode(ARMISD::LSLS, dl, DAG.getVTList(MVT::i32, FlagsVT),
4560 LHS.getOperand(0), DAG.getConstant(ShiftAmt, dl, MVT::i32));
4561 ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
4562 return Shift.getValue(1);
4563 }
4564
4566
4567 // If the RHS is a constant zero then the V (overflow) flag will never be
4568 // set. This can allow us to simplify GE to PL or LT to MI, which can be
4569 // simpler for other passes (like the peephole optimiser) to deal with.
4570 if (isNullConstant(RHS)) {
4571 switch (CondCode) {
4572 default: break;
4573 case ARMCC::GE:
4575 break;
4576 case ARMCC::LT:
4578 break;
4579 }
4580 }
4581
4582 unsigned CompareType;
4583 switch (CondCode) {
4584 default:
4585 CompareType = ARMISD::CMP;
4586 break;
4587 case ARMCC::EQ:
4588 case ARMCC::NE:
4589 // Uses only Z Flag
4590 CompareType = ARMISD::CMPZ;
4591 break;
4592 }
4593 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4594 return DAG.getNode(CompareType, dl, FlagsVT, LHS, RHS);
4595}
4596
4597/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
4598SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
4599 SelectionDAG &DAG, const SDLoc &dl,
4600 bool Signaling) const {
4601 assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
4602 SDValue Flags;
4604 Flags = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP, dl, FlagsVT,
4605 LHS, RHS);
4606 else
4607 Flags = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, dl,
4608 FlagsVT, LHS);
4609 return DAG.getNode(ARMISD::FMSTAT, dl, FlagsVT, Flags);
4610}
4611
4612// This function returns three things: the arithmetic computation itself
4613// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
4614// comparison and the condition code define the case in which the arithmetic
4615// computation *does not* overflow.
4616std::pair<SDValue, SDValue>
4617ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
4618 SDValue &ARMcc) const {
4619 assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
4620
4621 SDValue Value, OverflowCmp;
4622 SDValue LHS = Op.getOperand(0);
4623 SDValue RHS = Op.getOperand(1);
4624 SDLoc dl(Op);
4625
4626 // FIXME: We are currently always generating CMPs because we don't support
4627 // generating CMN through the backend. This is not as good as the natural
4628 // CMP case because it causes a register dependency and cannot be folded
4629 // later.
4630
4631 switch (Op.getOpcode()) {
4632 default:
4633 llvm_unreachable("Unknown overflow instruction!");
4634 case ISD::SADDO:
4635 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4636 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
4637 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
4638 break;
4639 case ISD::UADDO:
4640 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4641 // We use ADDC here to correspond to its use in LowerUnsignedALUO.
4642 // We do not use it in the USUBO case as Value may not be used.
4643 Value = DAG.getNode(ARMISD::ADDC, dl,
4644 DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
4645 .getValue(0);
4646 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
4647 break;
4648 case ISD::SSUBO:
4649 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4650 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4651 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
4652 break;
4653 case ISD::USUBO:
4654 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4655 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4656 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
4657 break;
4658 case ISD::UMULO:
4659 // We generate a UMUL_LOHI and then check if the high word is 0.
4660 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4661 Value = DAG.getNode(ISD::UMUL_LOHI, dl,
4662 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4663 LHS, RHS);
4664 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
4665 DAG.getConstant(0, dl, MVT::i32));
4666 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4667 break;
4668 case ISD::SMULO:
4669 // We generate a SMUL_LOHI and then check if all the bits of the high word
4670 // are the same as the sign bit of the low word.
4671 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4672 Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4673 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4674 LHS, RHS);
4675 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
4676 DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4677 Value.getValue(0),
4678 DAG.getConstant(31, dl, MVT::i32)));
4679 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4680 break;
4681 } // switch (...)
4682
4683 return std::make_pair(Value, OverflowCmp);
4684}
4685
4686SDValue
4687ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
4688 // Let legalize expand this if it isn't a legal type yet.
4689 if (!isTypeLegal(Op.getValueType()))
4690 return SDValue();
4691
4692 SDValue Value, OverflowCmp;
4693 SDValue ARMcc;
4694 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
4695 SDLoc dl(Op);
4696 // We use 0 and 1 as false and true values.
4697 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
4698 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
4699 EVT VT = Op.getValueType();
4700
4701 SDValue Overflow =
4702 DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp);
4703
4704 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
4705 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4706}
4707
4709 SelectionDAG &DAG) {
4710 SDLoc DL(BoolCarry);
4711 EVT CarryVT = BoolCarry.getValueType();
4712
4713 // This converts the boolean value carry into the carry flag by doing
4714 // ARMISD::SUBC Carry, 1
4715 SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
4716 DAG.getVTList(CarryVT, MVT::i32),
4717 BoolCarry, DAG.getConstant(1, DL, CarryVT));
4718 return Carry.getValue(1);
4719}
4720
4722 SelectionDAG &DAG) {
4723 SDLoc DL(Flags);
4724
4725 // Now convert the carry flag into a boolean carry. We do this
4726 // using ARMISD:ADDE 0, 0, Carry
4727 return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
4728 DAG.getConstant(0, DL, MVT::i32),
4729 DAG.getConstant(0, DL, MVT::i32), Flags);
4730}
4731
4732SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
4733 SelectionDAG &DAG) const {
4734 // Let legalize expand this if it isn't a legal type yet.
4735 if (!isTypeLegal(Op.getValueType()))
4736 return SDValue();
4737
4738 SDValue LHS = Op.getOperand(0);
4739 SDValue RHS = Op.getOperand(1);
4740 SDLoc dl(Op);
4741
4742 EVT VT = Op.getValueType();
4743 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4744 SDValue Value;
4745 SDValue Overflow;
4746 switch (Op.getOpcode()) {
4747 default:
4748 llvm_unreachable("Unknown overflow instruction!");
4749 case ISD::UADDO:
4750 Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
4751 // Convert the carry flag into a boolean value.
4752 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4753 break;
4754 case ISD::USUBO: {
4755 Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
4756 // Convert the carry flag into a boolean value.
4757 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4758 // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
4759 // value. So compute 1 - C.
4760 Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
4761 DAG.getConstant(1, dl, MVT::i32), Overflow);
4762 break;
4763 }
4764 }
4765
4766 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4767}
4768
4770 const ARMSubtarget *Subtarget) {
4771 EVT VT = Op.getValueType();
4772 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || Subtarget->isThumb1Only())
4773 return SDValue();
4774 if (!VT.isSimple())
4775 return SDValue();
4776
4777 unsigned NewOpcode;
4778 switch (VT.getSimpleVT().SimpleTy) {
4779 default:
4780 return SDValue();
4781 case MVT::i8:
4782 switch (Op->getOpcode()) {
4783 case ISD::UADDSAT:
4784 NewOpcode = ARMISD::UQADD8b;
4785 break;
4786 case ISD::SADDSAT:
4787 NewOpcode = ARMISD::QADD8b;
4788 break;
4789 case ISD::USUBSAT:
4790 NewOpcode = ARMISD::UQSUB8b;
4791 break;
4792 case ISD::SSUBSAT:
4793 NewOpcode = ARMISD::QSUB8b;
4794 break;
4795 }
4796 break;
4797 case MVT::i16:
4798 switch (Op->getOpcode()) {
4799 case ISD::UADDSAT:
4800 NewOpcode = ARMISD::UQADD16b;
4801 break;
4802 case ISD::SADDSAT:
4803 NewOpcode = ARMISD::QADD16b;
4804 break;
4805 case ISD::USUBSAT:
4806 NewOpcode = ARMISD::UQSUB16b;
4807 break;
4808 case ISD::SSUBSAT:
4809 NewOpcode = ARMISD::QSUB16b;
4810 break;
4811 }
4812 break;
4813 }
4814
4815 SDLoc dl(Op);
4816 SDValue Add =
4817 DAG.getNode(NewOpcode, dl, MVT::i32,
4818 DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
4819 DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
4820 return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
4821}
4822
4823SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
4824 SDValue Cond = Op.getOperand(0);
4825 SDValue SelectTrue = Op.getOperand(1);
4826 SDValue SelectFalse = Op.getOperand(2);
4827 SDLoc dl(Op);
4828 unsigned Opc = Cond.getOpcode();
4829
4830 if (Cond.getResNo() == 1 &&
4831 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
4832 Opc == ISD::USUBO)) {
4833 if (!isTypeLegal(Cond->getValueType(0)))
4834 return SDValue();
4835
4836 SDValue Value, OverflowCmp;
4837 SDValue ARMcc;
4838 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
4839 EVT VT = Op.getValueType();
4840
4841 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, OverflowCmp, DAG);
4842 }
4843
4844 // Convert:
4845 //
4846 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
4847 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
4848 //
4849 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
4850 const ConstantSDNode *CMOVTrue =
4851 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
4852 const ConstantSDNode *CMOVFalse =
4853 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
4854
4855 if (CMOVTrue && CMOVFalse) {
4856 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
4857 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
4858
4859 SDValue True;
4860 SDValue False;
4861 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
4862 True = SelectTrue;
4863 False = SelectFalse;
4864 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
4865 True = SelectFalse;
4866 False = SelectTrue;
4867 }
4868
4869 if (True.getNode() && False.getNode())
4870 return getCMOV(dl, Op.getValueType(), True, False, Cond.getOperand(2),
4871 Cond.getOperand(3), DAG);
4872 }
4873 }
4874
4875 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
4876 // undefined bits before doing a full-word comparison with zero.
4877 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
4878 DAG.getConstant(1, dl, Cond.getValueType()));
4879
4880 return DAG.getSelectCC(dl, Cond,
4881 DAG.getConstant(0, dl, Cond.getValueType()),
4882 SelectTrue, SelectFalse, ISD::SETNE);
4883}
4884
4886 bool &swpCmpOps, bool &swpVselOps) {
4887 // Start by selecting the GE condition code for opcodes that return true for
4888 // 'equality'
4889 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
4890 CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE)
4891 CondCode = ARMCC::GE;
4892
4893 // and GT for opcodes that return false for 'equality'.
4894 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
4895 CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT)
4896 CondCode = ARMCC::GT;
4897
4898 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
4899 // to swap the compare operands.
4900 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
4901 CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT)
4902 swpCmpOps = true;
4903
4904 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
4905 // If we have an unordered opcode, we need to swap the operands to the VSEL
4906 // instruction (effectively negating the condition).
4907 //
4908 // This also has the effect of swapping which one of 'less' or 'greater'
4909 // returns true, so we also swap the compare operands. It also switches
4910 // whether we return true for 'equality', so we compensate by picking the
4911 // opposite condition code to our original choice.
4912 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
4913 CC == ISD::SETUGT) {
4914 swpCmpOps = !swpCmpOps;
4915 swpVselOps = !swpVselOps;
4916 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
4917 }
4918
4919 // 'ordered' is 'anything but unordered', so use the VS condition code and
4920 // swap the VSEL operands.
4921 if (CC == ISD::SETO) {
4922 CondCode = ARMCC::VS;
4923 swpVselOps = true;
4924 }
4925
4926 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
4927 // code and swap the VSEL operands. Also do this if we don't care about the
4928 // unordered case.
4929 if (CC == ISD::SETUNE || CC == ISD::SETNE) {
4930 CondCode = ARMCC::EQ;
4931 swpVselOps = true;
4932 }
4933}
4934
4935SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
4936 SDValue TrueVal, SDValue ARMcc,
4937 SDValue Flags, SelectionDAG &DAG) const {
4938 if (!Subtarget->hasFP64() && VT == MVT::f64) {
4939 FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4940 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
4941 TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4942 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
4943
4944 SDValue TrueLow = TrueVal.getValue(0);
4945 SDValue TrueHigh = TrueVal.getValue(1);
4946 SDValue FalseLow = FalseVal.getValue(0);
4947 SDValue FalseHigh = FalseVal.getValue(1);
4948
4949 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
4950 ARMcc, Flags);
4951 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
4952 ARMcc, Flags);
4953
4954 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
4955 }
4956 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, Flags);
4957}
4958
4959static bool isGTorGE(ISD::CondCode CC) {
4960 return CC == ISD::SETGT || CC == ISD::SETGE;
4961}
4962
4963static bool isLTorLE(ISD::CondCode CC) {
4964 return CC == ISD::SETLT || CC == ISD::SETLE;
4965}
4966
4967// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
4968// All of these conditions (and their <= and >= counterparts) will do:
4969// x < k ? k : x
4970// x > k ? x : k
4971// k < x ? x : k
4972// k > x ? k : x
4973static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
4974 const SDValue TrueVal, const SDValue FalseVal,
4975 const ISD::CondCode CC, const SDValue K) {
4976 return (isGTorGE(CC) &&
4977 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
4978 (isLTorLE(CC) &&
4979 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
4980}
4981
4982// Check if two chained conditionals could be converted into SSAT or USAT.
4983//
4984// SSAT can replace a set of two conditional selectors that bound a number to an
4985// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
4986//
4987// x < -k ? -k : (x > k ? k : x)
4988// x < -k ? -k : (x < k ? x : k)
4989// x > -k ? (x > k ? k : x) : -k
4990// x < k ? (x < -k ? -k : x) : k
4991// etc.
4992//
4993// LLVM canonicalizes these to either a min(max()) or a max(min())
4994// pattern. This function tries to match one of these and will return a SSAT
4995// node if successful.
4996//
4997// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
4998// is a power of 2.
5000 EVT VT = Op.getValueType();
5001 SDValue V1 = Op.getOperand(0);
5002 SDValue K1 = Op.getOperand(1);
5003 SDValue TrueVal1 = Op.getOperand(2);
5004 SDValue FalseVal1 = Op.getOperand(3);
5005 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5006
5007 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
5008 if (Op2.getOpcode() != ISD::SELECT_CC)
5009 return SDValue();
5010
5011 SDValue V2 = Op2.getOperand(0);
5012 SDValue K2 = Op2.getOperand(1);
5013 SDValue TrueVal2 = Op2.getOperand(2);
5014 SDValue FalseVal2 = Op2.getOperand(3);
5015 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
5016
5017 SDValue V1Tmp = V1;
5018 SDValue V2Tmp = V2;
5019
5020 // Check that the registers and the constants match a max(min()) or min(max())
5021 // pattern
5022 if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
5023 K2 != FalseVal2 ||
5024 !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
5025 return SDValue();
5026
5027 // Check that the constant in the lower-bound check is
5028 // the opposite of the constant in the upper-bound check
5029 // in 1's complement.
5031 return SDValue();
5032
5033 int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
5034 int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
5035 int64_t PosVal = std::max(Val1, Val2);
5036 int64_t NegVal = std::min(Val1, Val2);
5037
5038 if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
5039 !isPowerOf2_64(PosVal + 1))
5040 return SDValue();
5041
5042 // Handle the difference between USAT (unsigned) and SSAT (signed)
5043 // saturation
5044 // At this point, PosVal is guaranteed to be positive
5045 uint64_t K = PosVal;
5046 SDLoc dl(Op);
5047 if (Val1 == ~Val2)
5048 return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
5049 DAG.getConstant(llvm::countr_one(K), dl, VT));
5050 if (NegVal == 0)
5051 return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
5052 DAG.getConstant(llvm::countr_one(K), dl, VT));
5053
5054 return SDValue();
5055}
5056
5057// Check if a condition of the type x < k ? k : x can be converted into a
5058// bit operation instead of conditional moves.
5059// Currently this is allowed given:
5060// - The conditions and values match up
5061// - k is 0 or -1 (all ones)
5062// This function will not check the last condition, thats up to the caller
5063// It returns true if the transformation can be made, and in such case
5064// returns x in V, and k in SatK.
5066 SDValue &SatK)
5067{
5068 SDValue LHS = Op.getOperand(0);
5069 SDValue RHS = Op.getOperand(1);
5070 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5071 SDValue TrueVal = Op.getOperand(2);
5072 SDValue FalseVal = Op.getOperand(3);
5073
5075 ? &RHS
5076 : nullptr;
5077
5078 // No constant operation in comparison, early out
5079 if (!K)
5080 return false;
5081
5082 SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
5083 V = (KTmp == TrueVal) ? FalseVal : TrueVal;
5084 SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
5085
5086 // If the constant on left and right side, or variable on left and right,
5087 // does not match, early out
5088 if (*K != KTmp || V != VTmp)
5089 return false;
5090
5091 if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
5092 SatK = *K;
5093 return true;
5094 }
5095
5096 return false;
5097}
5098
5099bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
5100 if (VT == MVT::f32)
5101 return !Subtarget->hasVFP2Base();
5102 if (VT == MVT::f64)
5103 return !Subtarget->hasFP64();
5104 if (VT == MVT::f16)
5105 return !Subtarget->hasFullFP16();
5106 return false;
5107}
5108
5109SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5110 EVT VT = Op.getValueType();
5111 SDLoc dl(Op);
5112
5113 // Try to convert two saturating conditional selects into a single SSAT
5114 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5115 if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
5116 return SatValue;
5117
5118 // Try to convert expressions of the form x < k ? k : x (and similar forms)
5119 // into more efficient bit operations, which is possible when k is 0 or -1
5120 // On ARM and Thumb-2 which have flexible operand 2 this will result in
5121 // single instructions. On Thumb the shift and the bit operation will be two
5122 // instructions.
5123 // Only allow this transformation on full-width (32-bit) operations
5124 SDValue LowerSatConstant;
5125 SDValue SatValue;
5126 if (VT == MVT::i32 &&
5127 isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
5128 SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
5129 DAG.getConstant(31, dl, VT));
5130 if (isNullConstant(LowerSatConstant)) {
5131 SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
5132 DAG.getAllOnesConstant(dl, VT));
5133 return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
5134 } else if (isAllOnesConstant(LowerSatConstant))
5135 return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
5136 }
5137
5138 SDValue LHS = Op.getOperand(0);
5139 SDValue RHS = Op.getOperand(1);
5140 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5141 SDValue TrueVal = Op.getOperand(2);
5142 SDValue FalseVal = Op.getOperand(3);
5143 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
5144 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
5145 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
5146 if (Op.getValueType().isInteger()) {
5147
5148 // Check for SMAX(lhs, 0) and SMIN(lhs, 0) patterns.
5149 // (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1))
5150 // (SELECT_CC setlt, lhs, 0, lhs, 0) -> (AND lhs, (SRA lhs, typesize-1))
5151 // Both require less instructions than compare and conditional select.
5152 if ((CC == ISD::SETGT || CC == ISD::SETLT) && LHS == TrueVal && RHSC &&
5153 RHSC->isZero() && CFVal && CFVal->isZero() &&
5154 LHS.getValueType() == RHS.getValueType()) {
5155 EVT VT = LHS.getValueType();
5156 SDValue Shift =
5157 DAG.getNode(ISD::SRA, dl, VT, LHS,
5158 DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
5159
5160 if (CC == ISD::SETGT)
5161 Shift = DAG.getNOT(dl, Shift, VT);
5162
5163 return DAG.getNode(ISD::AND, dl, VT, LHS, Shift);
5164 }
5165 }
5166
5167 if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5168 LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
5169 unsigned TVal = CTVal->getZExtValue();
5170 unsigned FVal = CFVal->getZExtValue();
5171 unsigned Opcode = 0;
5172
5173 if (TVal == ~FVal) {
5174 Opcode = ARMISD::CSINV;
5175 } else if (TVal == ~FVal + 1) {
5176 Opcode = ARMISD::CSNEG;
5177 } else if (TVal + 1 == FVal) {
5178 Opcode = ARMISD::CSINC;
5179 } else if (TVal == FVal + 1) {
5180 Opcode = ARMISD::CSINC;
5181 std::swap(TrueVal, FalseVal);
5182 std::swap(TVal, FVal);
5183 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5184 }
5185
5186 if (Opcode) {
5187 // If one of the constants is cheaper than another, materialise the
5188 // cheaper one and let the csel generate the other.
5189 if (Opcode != ARMISD::CSINC &&
5190 HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
5191 std::swap(TrueVal, FalseVal);
5192 std::swap(TVal, FVal);
5193 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5194 }
5195
5196 // Attempt to use ZR checking TVal is 0, possibly inverting the condition
5197 // to get there. CSINC not is invertable like the other two (~(~a) == a,
5198 // -(-a) == a, but (a+1)+1 != a).
5199 if (FVal == 0 && Opcode != ARMISD::CSINC) {
5200 std::swap(TrueVal, FalseVal);
5201 std::swap(TVal, FVal);
5202 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5203 }
5204
5205 // Drops F's value because we can get it by inverting/negating TVal.
5206 FalseVal = TrueVal;
5207
5208 SDValue ARMcc;
5209 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5210 EVT VT = TrueVal.getValueType();
5211 return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
5212 }
5213 }
5214
5215 if (isUnsupportedFloatingType(LHS.getValueType())) {
5216 softenSetCCOperands(DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5217
5218 // If softenSetCCOperands only returned one value, we should compare it to
5219 // zero.
5220 if (!RHS.getNode()) {
5221 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5222 CC = ISD::SETNE;
5223 }
5224 }
5225
5226 if (LHS.getValueType() == MVT::i32) {
5227 // Try to generate VSEL on ARMv8.
5228 // The VSEL instruction can't use all the usual ARM condition
5229 // codes: it only has two bits to select the condition code, so it's
5230 // constrained to use only GE, GT, VS and EQ.
5231 //
5232 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
5233 // swap the operands of the previous compare instruction (effectively
5234 // inverting the compare condition, swapping 'less' and 'greater') and
5235 // sometimes need to swap the operands to the VSEL (which inverts the
5236 // condition in the sense of firing whenever the previous condition didn't)
5237 if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
5238 TrueVal.getValueType() == MVT::f32 ||
5239 TrueVal.getValueType() == MVT::f64)) {
5241 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
5242 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
5243 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5244 std::swap(TrueVal, FalseVal);
5245 }
5246 }
5247
5248 SDValue ARMcc;
5249 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5250 // Choose GE over PL, which vsel does now support
5251 if (ARMcc->getAsZExtVal() == ARMCC::PL)
5252 ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
5253 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
5254 }
5255
5256 ARMCC::CondCodes CondCode, CondCode2;
5257 FPCCToARMCC(CC, CondCode, CondCode2);
5258
5259 // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
5260 // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
5261 // must use VSEL (limited condition codes), due to not having conditional f16
5262 // moves.
5263 if (Subtarget->hasFPARMv8Base() &&
5264 !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
5265 (TrueVal.getValueType() == MVT::f16 ||
5266 TrueVal.getValueType() == MVT::f32 ||
5267 TrueVal.getValueType() == MVT::f64)) {
5268 bool swpCmpOps = false;
5269 bool swpVselOps = false;
5270 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
5271
5272 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
5273 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
5274 if (swpCmpOps)
5275 std::swap(LHS, RHS);
5276 if (swpVselOps)
5277 std::swap(TrueVal, FalseVal);
5278 }
5279 }
5280
5281 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5282 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5283 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
5284 if (CondCode2 != ARMCC::AL) {
5285 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
5286 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, Cmp, DAG);
5287 }
5288 return Result;
5289}
5290
5291/// canChangeToInt - Given the fp compare operand, return true if it is suitable
5292/// to morph to an integer compare sequence.
5293static bool canChangeToInt(SDValue Op, bool &SeenZero,
5294 const ARMSubtarget *Subtarget) {
5295 SDNode *N = Op.getNode();
5296 if (!N->hasOneUse())
5297 // Otherwise it requires moving the value from fp to integer registers.
5298 return false;
5299 if (!N->getNumValues())
5300 return false;
5301 EVT VT = Op.getValueType();
5302 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5303 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
5304 // vmrs are very slow, e.g. cortex-a8.
5305 return false;
5306
5307 if (isFloatingPointZero(Op)) {
5308 SeenZero = true;
5309 return true;
5310 }
5311 return ISD::isNormalLoad(N);
5312}
5313
5316 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
5317
5319 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
5320 Ld->getPointerInfo(), Ld->getAlign(),
5321 Ld->getMemOperand()->getFlags());
5322
5323 llvm_unreachable("Unknown VFP cmp argument!");
5324}
5325
5327 SDValue &RetVal1, SDValue &RetVal2) {
5328 SDLoc dl(Op);
5329
5330 if (isFloatingPointZero(Op)) {
5331 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
5332 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
5333 return;
5334 }
5335
5336 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
5337 SDValue Ptr = Ld->getBasePtr();
5338 RetVal1 =
5339 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5340 Ld->getAlign(), Ld->getMemOperand()->getFlags());
5341
5342 EVT PtrType = Ptr.getValueType();
5343 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
5344 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
5345 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5346 Ld->getPointerInfo().getWithOffset(4),
5347 commonAlignment(Ld->getAlign(), 4),
5348 Ld->getMemOperand()->getFlags());
5349 return;
5350 }
5351
5352 llvm_unreachable("Unknown VFP cmp argument!");
5353}
5354
5355/// OptimizeVFPBrcond - With nnan and without daz, it's legal to optimize some
5356/// f32 and even f64 comparisons to integer ones.
5357SDValue
5358ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
5359 SDValue Chain = Op.getOperand(0);
5360 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5361 SDValue LHS = Op.getOperand(2);
5362 SDValue RHS = Op.getOperand(3);
5363 SDValue Dest = Op.getOperand(4);
5364 SDLoc dl(Op);
5365
5366 bool LHSSeenZero = false;
5367 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
5368 bool RHSSeenZero = false;
5369 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
5370 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5371 // If unsafe fp math optimization is enabled and there are no other uses of
5372 // the CMP operands, and the condition code is EQ or NE, we can optimize it
5373 // to an integer comparison.
5374 if (CC == ISD::SETOEQ)
5375 CC = ISD::SETEQ;
5376 else if (CC == ISD::SETUNE)
5377 CC = ISD::SETNE;
5378
5379 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5380 SDValue ARMcc;
5381 if (LHS.getValueType() == MVT::f32) {
5382 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5383 bitcastf32Toi32(LHS, DAG), Mask);
5384 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5385 bitcastf32Toi32(RHS, DAG), Mask);
5386 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5387 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5388 Cmp);
5389 }
5390
5391 SDValue LHS1, LHS2;
5392 SDValue RHS1, RHS2;
5393 expandf64Toi32(LHS, DAG, LHS1, LHS2);
5394 expandf64Toi32(RHS, DAG, RHS1, RHS2);
5395 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
5396 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
5398 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5399 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5400 return DAG.getNode(ARMISD::BCC_i64, dl, MVT::Other, Ops);
5401 }
5402
5403 return SDValue();
5404}
5405
5406// Generate CMP + CMOV for integer abs.
5407SDValue ARMTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
5408 SDLoc DL(Op);
5409
5410 SDValue Neg = DAG.getNegative(Op.getOperand(0), DL, MVT::i32);
5411
5412 // Generate CMP & CMOV.
5413 SDValue Cmp = DAG.getNode(ARMISD::CMP, DL, FlagsVT, Op.getOperand(0),
5414 DAG.getConstant(0, DL, MVT::i32));
5415 return DAG.getNode(ARMISD::CMOV, DL, MVT::i32, Op.getOperand(0), Neg,
5416 DAG.getConstant(ARMCC::MI, DL, MVT::i32), Cmp);
5417}
5418
5419SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
5420 SDValue Chain = Op.getOperand(0);
5421 SDValue Cond = Op.getOperand(1);
5422 SDValue Dest = Op.getOperand(2);
5423 SDLoc dl(Op);
5424
5425 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5426 // instruction.
5427 unsigned Opc = Cond.getOpcode();
5428 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5429 !Subtarget->isThumb1Only();
5430 if (Cond.getResNo() == 1 &&
5431 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5432 Opc == ISD::USUBO || OptimizeMul)) {
5433 // Only lower legal XALUO ops.
5434 if (!isTypeLegal(Cond->getValueType(0)))
5435 return SDValue();
5436
5437 // The actual operation with overflow check.
5438 SDValue Value, OverflowCmp;
5439 SDValue ARMcc;
5440 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5441
5442 // Reverse the condition code.
5444 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5446 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5447
5448 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5449 OverflowCmp);
5450 }
5451
5452 return SDValue();
5453}
5454
5455SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
5456 SDValue Chain = Op.getOperand(0);
5457 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5458 SDValue LHS = Op.getOperand(2);
5459 SDValue RHS = Op.getOperand(3);
5460 SDValue Dest = Op.getOperand(4);
5461 SDLoc dl(Op);
5462
5463 if (isUnsupportedFloatingType(LHS.getValueType())) {
5464 softenSetCCOperands(DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5465
5466 // If softenSetCCOperands only returned one value, we should compare it to
5467 // zero.
5468 if (!RHS.getNode()) {
5469 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5470 CC = ISD::SETNE;
5471 }
5472 }
5473
5474 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5475 // instruction.
5476 unsigned Opc = LHS.getOpcode();
5477 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5478 !Subtarget->isThumb1Only();
5479 if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
5480 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5481 Opc == ISD::USUBO || OptimizeMul) &&
5482 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
5483 // Only lower legal XALUO ops.
5484 if (!isTypeLegal(LHS->getValueType(0)))
5485 return SDValue();
5486
5487 // The actual operation with overflow check.
5488 SDValue Value, OverflowCmp;
5489 SDValue ARMcc;
5490 std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
5491
5492 if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
5493 // Reverse the condition code.
5495 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5497 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5498 }
5499
5500 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5501 OverflowCmp);
5502 }
5503
5504 if (LHS.getValueType() == MVT::i32) {
5505 SDValue ARMcc;
5506 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5507 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, Cmp);
5508 }
5509
5510 SDNodeFlags Flags = Op->getFlags();
5511 if (Flags.hasNoNaNs() &&
5512 DAG.getDenormalMode(MVT::f32) == DenormalMode::getIEEE() &&
5513 DAG.getDenormalMode(MVT::f64) == DenormalMode::getIEEE() &&
5514 (CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETNE ||
5515 CC == ISD::SETUNE)) {
5516 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
5517 return Result;
5518 }
5519
5520 ARMCC::CondCodes CondCode, CondCode2;
5521 FPCCToARMCC(CC, CondCode, CondCode2);
5522
5523 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5524 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5525 SDValue Ops[] = {Chain, Dest, ARMcc, Cmp};
5526 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops);
5527 if (CondCode2 != ARMCC::AL) {
5528 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
5529 SDValue Ops[] = {Res, Dest, ARMcc, Cmp};
5530 Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops);
5531 }
5532 return Res;
5533}
5534
5535SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
5536 SDValue Chain = Op.getOperand(0);
5537 SDValue Table = Op.getOperand(1);
5538 SDValue Index = Op.getOperand(2);
5539 SDLoc dl(Op);
5540
5541 EVT PTy = getPointerTy(DAG.getDataLayout());
5542 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
5543 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
5544 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
5545 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
5546 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
5547 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5548 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
5549 // which does another jump to the destination. This also makes it easier
5550 // to translate it to TBB / TBH later (Thumb2 only).
5551 // FIXME: This might not work if the function is extremely large.
5552 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
5553 Addr, Op.getOperand(2), JTI);
5554 }
5555 if (isPositionIndependent() || Subtarget->isROPI()) {
5556 Addr =
5557 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
5559 Chain = Addr.getValue(1);
5560 Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
5561 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5562 } else {
5563 Addr =
5564 DAG.getLoad(PTy, dl, Chain, Addr,
5566 Chain = Addr.getValue(1);
5567 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5568 }
5569}
5570
5572 EVT VT = Op.getValueType();
5573 SDLoc dl(Op);
5574
5575 if (Op.getValueType().getVectorElementType() == MVT::i32) {
5576 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
5577 return Op;
5578 return DAG.UnrollVectorOp(Op.getNode());
5579 }
5580
5581 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5582
5583 EVT NewTy;
5584 const EVT OpTy = Op.getOperand(0).getValueType();
5585 if (OpTy == MVT::v4f32)
5586 NewTy = MVT::v4i32;
5587 else if (OpTy == MVT::v4f16 && HasFullFP16)
5588 NewTy = MVT::v4i16;
5589 else if (OpTy == MVT::v8f16 && HasFullFP16)
5590 NewTy = MVT::v8i16;
5591 else
5592 llvm_unreachable("Invalid type for custom lowering!");
5593
5594 if (VT != MVT::v4i16 && VT != MVT::v8i16)
5595 return DAG.UnrollVectorOp(Op.getNode());
5596
5597 Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
5598 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
5599}
5600
5601SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
5602 EVT VT = Op.getValueType();
5603 if (VT.isVector())
5604 return LowerVectorFP_TO_INT(Op, DAG);
5605
5606 bool IsStrict = Op->isStrictFPOpcode();
5607 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
5608
5609 if (isUnsupportedFloatingType(SrcVal.getValueType())) {
5610 RTLIB::Libcall LC;
5611 if (Op.getOpcode() == ISD::FP_TO_SINT ||
5612 Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
5613 LC = RTLIB::getFPTOSINT(SrcVal.getValueType(),
5614 Op.getValueType());
5615 else
5616 LC = RTLIB::getFPTOUINT(SrcVal.getValueType(),
5617 Op.getValueType());
5618 SDLoc Loc(Op);
5619 MakeLibCallOptions CallOptions;
5620 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
5622 std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
5623 CallOptions, Loc, Chain);
5624 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
5625 }
5626
5627 // FIXME: Remove this when we have strict fp instruction selection patterns
5628 if (IsStrict) {
5629 SDLoc Loc(Op);
5630 SDValue Result =
5633 Loc, Op.getValueType(), SrcVal);
5634 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
5635 }
5636
5637 return Op;
5638}
5639
5641 const ARMSubtarget *Subtarget) {
5642 EVT VT = Op.getValueType();
5643 EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
5644 EVT FromVT = Op.getOperand(0).getValueType();
5645
5646 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32)
5647 return Op;
5648 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 &&
5649 Subtarget->hasFP64())
5650 return Op;
5651 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 &&
5652 Subtarget->hasFullFP16())
5653 return Op;
5654 if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 &&
5655 Subtarget->hasMVEFloatOps())
5656 return Op;
5657 if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 &&
5658 Subtarget->hasMVEFloatOps())
5659 return Op;
5660
5661 if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16)
5662 return SDValue();
5663
5664 SDLoc DL(Op);
5665 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
5666 unsigned BW = ToVT.getScalarSizeInBits() - IsSigned;
5667 SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
5668 DAG.getValueType(VT.getScalarType()));
5669 SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT,
5670 DAG.getConstant((1 << BW) - 1, DL, VT));
5671 if (IsSigned)
5672 Max = DAG.getNode(ISD::SMAX, DL, VT, Max,
5673 DAG.getSignedConstant(-(1 << BW), DL, VT));
5674 return Max;
5675}
5676
5678 EVT VT = Op.getValueType();
5679 SDLoc dl(Op);
5680
5681 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
5682 if (VT.getVectorElementType() == MVT::f32)
5683 return Op;
5684 return DAG.UnrollVectorOp(Op.getNode());
5685 }
5686
5687 assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||
5688 Op.getOperand(0).getValueType() == MVT::v8i16) &&
5689 "Invalid type for custom lowering!");
5690
5691 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5692
5693 EVT DestVecType;
5694 if (VT == MVT::v4f32)
5695 DestVecType = MVT::v4i32;
5696 else if (VT == MVT::v4f16 && HasFullFP16)
5697 DestVecType = MVT::v4i16;
5698 else if (VT == MVT::v8f16 && HasFullFP16)
5699 DestVecType = MVT::v8i16;
5700 else
5701 return DAG.UnrollVectorOp(Op.getNode());
5702
5703 unsigned CastOpc;
5704 unsigned Opc;
5705 switch (Op.getOpcode()) {
5706 default: llvm_unreachable("Invalid opcode!");
5707 case ISD::SINT_TO_FP:
5708 CastOpc = ISD::SIGN_EXTEND;
5710 break;
5711 case ISD::UINT_TO_FP:
5712 CastOpc = ISD::ZERO_EXTEND;
5714 break;
5715 }
5716
5717 Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
5718 return DAG.getNode(Opc, dl, VT, Op);
5719}
5720
5721SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
5722 EVT VT = Op.getValueType();
5723 if (VT.isVector())
5724 return LowerVectorINT_TO_FP(Op, DAG);
5725 if (isUnsupportedFloatingType(VT)) {
5726 RTLIB::Libcall LC;
5727 if (Op.getOpcode() == ISD::SINT_TO_FP)
5728 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
5729 Op.getValueType());
5730 else
5731 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
5732 Op.getValueType());
5733 MakeLibCallOptions CallOptions;
5734 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
5735 CallOptions, SDLoc(Op)).first;
5736 }
5737
5738 return Op;
5739}
5740
5741SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
5742 // Implement fcopysign with a fabs and a conditional fneg.
5743 SDValue Tmp0 = Op.getOperand(0);
5744 SDValue Tmp1 = Op.getOperand(1);
5745 SDLoc dl(Op);
5746 EVT VT = Op.getValueType();
5747 EVT SrcVT = Tmp1.getValueType();
5748 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
5749 Tmp0.getOpcode() == ARMISD::VMOVDRR;
5750 bool UseNEON = !InGPR && Subtarget->hasNEON();
5751
5752 if (UseNEON) {
5753 // Use VBSL to copy the sign bit.
5754 unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80);
5755 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
5756 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
5757 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
5758 if (VT == MVT::f64)
5759 Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
5760 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
5761 DAG.getConstant(32, dl, MVT::i32));
5762 else /*if (VT == MVT::f32)*/
5763 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
5764 if (SrcVT == MVT::f32) {
5765 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
5766 if (VT == MVT::f64)
5767 Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
5768 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
5769 DAG.getConstant(32, dl, MVT::i32));
5770 } else if (VT == MVT::f32)
5771 Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
5772 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
5773 DAG.getConstant(32, dl, MVT::i32));
5774 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
5775 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
5776
5778 dl, MVT::i32);
5779 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
5780 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
5781 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
5782
5783 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
5784 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
5785 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
5786 if (VT == MVT::f32) {
5787 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
5788 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
5789 DAG.getConstant(0, dl, MVT::i32));
5790 } else {
5791 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
5792 }
5793
5794 return Res;
5795 }
5796
5797 // Bitcast operand 1 to i32.
5798 if (SrcVT == MVT::f64)
5799 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
5800 Tmp1).getValue(1);
5801 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
5802
5803 // Or in the signbit with integer operations.
5804 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
5805 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5806 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
5807 if (VT == MVT::f32) {
5808 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
5809 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
5810 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
5811 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
5812 }
5813
5814 // f64: Or the high part with signbit and then combine two parts.
5815 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
5816 Tmp0);
5817 SDValue Lo = Tmp0.getValue(0);
5818 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
5819 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
5820 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
5821}
5822
5823SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
5824 MachineFunction &MF = DAG.getMachineFunction();
5825 MachineFrameInfo &MFI = MF.getFrameInfo();
5826 MFI.setReturnAddressIsTaken(true);
5827
5828 EVT VT = Op.getValueType();
5829 SDLoc dl(Op);
5830 unsigned Depth = Op.getConstantOperandVal(0);
5831 if (Depth) {
5832 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
5833 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
5834 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
5835 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
5836 MachinePointerInfo());
5837 }
5838
5839 // Return LR, which contains the return address. Mark it an implicit live-in.
5840 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
5841 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
5842}
5843
5844SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
5845 const ARMBaseRegisterInfo &ARI =
5846 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
5847 MachineFunction &MF = DAG.getMachineFunction();
5848 MachineFrameInfo &MFI = MF.getFrameInfo();
5849 MFI.setFrameAddressIsTaken(true);
5850
5851 EVT VT = Op.getValueType();
5852 SDLoc dl(Op); // FIXME probably not meaningful
5853 unsigned Depth = Op.getConstantOperandVal(0);
5854 Register FrameReg = ARI.getFrameRegister(MF);
5855 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
5856 while (Depth--)
5857 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
5858 MachinePointerInfo());
5859 return FrameAddr;
5860}
5861
5862// FIXME? Maybe this could be a TableGen attribute on some registers and
5863// this table could be generated automatically from RegInfo.
5864Register ARMTargetLowering::getRegisterByName(const char* RegName, LLT VT,
5865 const MachineFunction &MF) const {
5866 return StringSwitch<Register>(RegName)
5867 .Case("sp", ARM::SP)
5868 .Default(Register());
5869}
5870
5871// Result is 64 bit value so split into two 32 bit values and return as a
5872// pair of values.
5874 SelectionDAG &DAG) {
5875 SDLoc DL(N);
5876
5877 // This function is only supposed to be called for i64 type destination.
5878 assert(N->getValueType(0) == MVT::i64
5879 && "ExpandREAD_REGISTER called for non-i64 type result.");
5880
5882 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
5883 N->getOperand(0),
5884 N->getOperand(1));
5885
5886 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
5887 Read.getValue(1)));
5888 Results.push_back(Read.getValue(2)); // Chain
5889}
5890
5891/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
5892/// When \p DstVT, the destination type of \p BC, is on the vector
5893/// register bank and the source of bitcast, \p Op, operates on the same bank,
5894/// it might be possible to combine them, such that everything stays on the
5895/// vector register bank.
5896/// \p return The node that would replace \p BT, if the combine
5897/// is possible.
5899 SelectionDAG &DAG) {
5900 SDValue Op = BC->getOperand(0);
5901 EVT DstVT = BC->getValueType(0);
5902
5903 // The only vector instruction that can produce a scalar (remember,
5904 // since the bitcast was about to be turned into VMOVDRR, the source
5905 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
5906 // Moreover, we can do this combine only if there is one use.
5907 // Finally, if the destination type is not a vector, there is not
5908 // much point on forcing everything on the vector bank.
5909 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
5910 !Op.hasOneUse())
5911 return SDValue();
5912
5913 // If the index is not constant, we will introduce an additional
5914 // multiply that will stick.
5915 // Give up in that case.
5916 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
5917 if (!Index)
5918 return SDValue();
5919 unsigned DstNumElt = DstVT.getVectorNumElements();
5920
5921 // Compute the new index.
5922 const APInt &APIntIndex = Index->getAPIntValue();
5923 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
5924 NewIndex *= APIntIndex;
5925 // Check if the new constant index fits into i32.
5926 if (NewIndex.getBitWidth() > 32)
5927 return SDValue();
5928
5929 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
5930 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
5931 SDLoc dl(Op);
5932 SDValue ExtractSrc = Op.getOperand(0);
5933 EVT VecVT = EVT::getVectorVT(
5934 *DAG.getContext(), DstVT.getScalarType(),
5935 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
5936 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
5937 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
5938 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
5939}
5940
5941/// ExpandBITCAST - If the target supports VFP, this function is called to
5942/// expand a bit convert where either the source or destination type is i64 to
5943/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
5944/// operand type is illegal (e.g., v2f32 for a target that doesn't support
5945/// vectors), since the legalizer won't know what to do with that.
5946SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
5947 const ARMSubtarget *Subtarget) const {
5948 SDLoc dl(N);
5949 SDValue Op = N->getOperand(0);
5950
5951 // This function is only supposed to be called for i16 and i64 types, either
5952 // as the source or destination of the bit convert.
5953 EVT SrcVT = Op.getValueType();
5954 EVT DstVT = N->getValueType(0);
5955
5956 if ((SrcVT == MVT::i16 || SrcVT == MVT::i32) &&
5957 (DstVT == MVT::f16 || DstVT == MVT::bf16))
5958 return MoveToHPR(SDLoc(N), DAG, MVT::i32, DstVT.getSimpleVT(),
5959 DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op));
5960
5961 if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
5962 (SrcVT == MVT::f16 || SrcVT == MVT::bf16)) {
5963 if (Subtarget->hasFullFP16() && !Subtarget->hasBF16())
5964 Op = DAG.getBitcast(MVT::f16, Op);
5965 return DAG.getNode(
5966 ISD::TRUNCATE, SDLoc(N), DstVT,
5967 MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op));
5968 }
5969
5970 if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
5971 return SDValue();
5972
5973 // Turn i64->f64 into VMOVDRR.
5974 if (SrcVT == MVT::i64 && isTypeLegal(DstVT)) {
5975 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
5976 // if we can combine the bitcast with its source.
5978 return Val;
5979 SDValue Lo, Hi;
5980 std::tie(Lo, Hi) = DAG.SplitScalar(Op, dl, MVT::i32, MVT::i32);
5981 return DAG.getNode(ISD::BITCAST, dl, DstVT,
5982 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
5983 }
5984
5985 // Turn f64->i64 into VMOVRRD.
5986 if (DstVT == MVT::i64 && isTypeLegal(SrcVT)) {
5987 SDValue Cvt;
5988 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
5989 SrcVT.getVectorNumElements() > 1)
5990 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
5991 DAG.getVTList(MVT::i32, MVT::i32),
5992 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
5993 else
5994 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
5995 DAG.getVTList(MVT::i32, MVT::i32), Op);
5996 // Merge the pieces into a single i64 value.
5997 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
5998 }
5999
6000 return SDValue();
6001}
6002
6003/// getZeroVector - Returns a vector of specified type with all zero elements.
6004/// Zero vectors are used to represent vector negation and in those cases
6005/// will be implemented with the NEON VNEG instruction. However, VNEG does
6006/// not support i64 elements, so sometimes the zero vectors will need to be
6007/// explicitly constructed. Regardless, use a canonical VMOV to create the
6008/// zero vector.
6009static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
6010 assert(VT.isVector() && "Expected a vector type");
6011 // The canonical modified immediate encoding of a zero vector is....0!
6012 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
6013 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
6014 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
6015 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6016}
6017
6018/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
6019/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6020SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
6021 SelectionDAG &DAG) const {
6022 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6023 EVT VT = Op.getValueType();
6024 unsigned VTBits = VT.getSizeInBits();
6025 SDLoc dl(Op);
6026 SDValue ShOpLo = Op.getOperand(0);
6027 SDValue ShOpHi = Op.getOperand(1);
6028 SDValue ShAmt = Op.getOperand(2);
6029 SDValue ARMcc;
6030 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
6031
6032 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
6033
6034 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6035 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6036 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
6037 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6038 DAG.getConstant(VTBits, dl, MVT::i32));
6039 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
6040 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6041 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
6042 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6043 ISD::SETGE, ARMcc, DAG, dl);
6044 SDValue Lo =
6045 DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, ARMcc, CmpLo);
6046
6047 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
6048 SDValue HiBigShift = Opc == ISD::SRA
6049 ? DAG.getNode(Opc, dl, VT, ShOpHi,
6050 DAG.getConstant(VTBits - 1, dl, VT))
6051 : DAG.getConstant(0, dl, VT);
6052 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6053 ISD::SETGE, ARMcc, DAG, dl);
6054 SDValue Hi =
6055 DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
6056
6057 SDValue Ops[2] = { Lo, Hi };
6058 return DAG.getMergeValues(Ops, dl);
6059}
6060
6061/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
6062/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6063SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
6064 SelectionDAG &DAG) const {
6065 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6066 EVT VT = Op.getValueType();
6067 unsigned VTBits = VT.getSizeInBits();
6068 SDLoc dl(Op);
6069 SDValue ShOpLo = Op.getOperand(0);
6070 SDValue ShOpHi = Op.getOperand(1);
6071 SDValue ShAmt = Op.getOperand(2);
6072 SDValue ARMcc;
6073
6074 assert(Op.getOpcode() == ISD::SHL_PARTS);
6075 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6076 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6077 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
6078 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
6079 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6080
6081 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6082 DAG.getConstant(VTBits, dl, MVT::i32));
6083 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
6084 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6085 ISD::SETGE, ARMcc, DAG, dl);
6086 SDValue Hi =
6087 DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
6088
6089 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6090 ISD::SETGE, ARMcc, DAG, dl);
6091 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
6092 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
6093 DAG.getConstant(0, dl, VT), ARMcc, CmpLo);
6094
6095 SDValue Ops[2] = { Lo, Hi };
6096 return DAG.getMergeValues(Ops, dl);
6097}
6098
6099SDValue ARMTargetLowering::LowerGET_ROUNDING(SDValue Op,
6100 SelectionDAG &DAG) const {
6101 // The rounding mode is in bits 23:22 of the FPSCR.
6102 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
6103 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
6104 // so that the shift + and get folded into a bitfield extract.
6105 SDLoc dl(Op);
6106 SDValue Chain = Op.getOperand(0);
6107 SDValue Ops[] = {Chain,
6108 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
6109
6110 SDValue FPSCR =
6111 DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, {MVT::i32, MVT::Other}, Ops);
6112 Chain = FPSCR.getValue(1);
6113 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
6114 DAG.getConstant(1U << 22, dl, MVT::i32));
6115 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
6116 DAG.getConstant(22, dl, MVT::i32));
6117 SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
6118 DAG.getConstant(3, dl, MVT::i32));
6119 return DAG.getMergeValues({And, Chain}, dl);
6120}
6121
6122SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op,
6123 SelectionDAG &DAG) const {
6124 SDLoc DL(Op);
6125 SDValue Chain = Op->getOperand(0);
6126 SDValue RMValue = Op->getOperand(1);
6127
6128 // The rounding mode is in bits 23:22 of the FPSCR.
6129 // The llvm.set.rounding argument value to ARM rounding mode value mapping
6130 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
6131 // ((arg - 1) & 3) << 22).
6132 //
6133 // It is expected that the argument of llvm.set.rounding is within the
6134 // segment [0, 3], so NearestTiesToAway (4) is not handled here. It is
6135 // responsibility of the code generated llvm.set.rounding to ensure this
6136 // condition.
6137
6138 // Calculate new value of FPSCR[23:22].
6139 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
6140 DAG.getConstant(1, DL, MVT::i32));
6141 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
6142 DAG.getConstant(0x3, DL, MVT::i32));
6143 RMValue = DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
6144 DAG.getConstant(ARM::RoundingBitsPos, DL, MVT::i32));
6145
6146 // Get current value of FPSCR.
6147 SDValue Ops[] = {Chain,
6148 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6149 SDValue FPSCR =
6150 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6151 Chain = FPSCR.getValue(1);
6152 FPSCR = FPSCR.getValue(0);
6153
6154 // Put new rounding mode into FPSCR[23:22].
6155 const unsigned RMMask = ~(ARM::Rounding::rmMask << ARM::RoundingBitsPos);
6156 FPSCR = DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6157 DAG.getConstant(RMMask, DL, MVT::i32));
6158 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCR, RMValue);
6159 SDValue Ops2[] = {
6160 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6161 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6162}
6163
6164SDValue ARMTargetLowering::LowerSET_FPMODE(SDValue Op,
6165 SelectionDAG &DAG) const {
6166 SDLoc DL(Op);
6167 SDValue Chain = Op->getOperand(0);
6168 SDValue Mode = Op->getOperand(1);
6169
6170 // Generate nodes to build:
6171 // FPSCR = (FPSCR & FPStatusBits) | (Mode & ~FPStatusBits)
6172 SDValue Ops[] = {Chain,
6173 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6174 SDValue FPSCR =
6175 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6176 Chain = FPSCR.getValue(1);
6177 FPSCR = FPSCR.getValue(0);
6178
6179 SDValue FPSCRMasked =
6180 DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6181 DAG.getConstant(ARM::FPStatusBits, DL, MVT::i32));
6182 SDValue InputMasked =
6183 DAG.getNode(ISD::AND, DL, MVT::i32, Mode,
6184 DAG.getConstant(~ARM::FPStatusBits, DL, MVT::i32));
6185 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCRMasked, InputMasked);
6186
6187 SDValue Ops2[] = {
6188 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6189 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6190}
6191
6192SDValue ARMTargetLowering::LowerRESET_FPMODE(SDValue Op,
6193 SelectionDAG &DAG) const {
6194 SDLoc DL(Op);
6195 SDValue Chain = Op->getOperand(0);
6196
6197 // To get the default FP mode all control bits are cleared:
6198 // FPSCR = FPSCR & (FPStatusBits | FPReservedBits)
6199 SDValue Ops[] = {Chain,
6200 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6201 SDValue FPSCR =
6202 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6203 Chain = FPSCR.getValue(1);
6204 FPSCR = FPSCR.getValue(0);
6205
6206 SDValue FPSCRMasked = DAG.getNode(
6207 ISD::AND, DL, MVT::i32, FPSCR,
6209 SDValue Ops2[] = {Chain,
6210 DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32),
6211 FPSCRMasked};
6212 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6213}
6214
6216 const ARMSubtarget *ST) {
6217 SDLoc dl(N);
6218 EVT VT = N->getValueType(0);
6219 if (VT.isVector() && ST->hasNEON()) {
6220
6221 // Compute the least significant set bit: LSB = X & -X
6222 SDValue X = N->getOperand(0);
6223 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
6224 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
6225
6226 EVT ElemTy = VT.getVectorElementType();
6227
6228 if (ElemTy == MVT::i8) {
6229 // Compute with: cttz(x) = ctpop(lsb - 1)
6230 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6231 DAG.getTargetConstant(1, dl, ElemTy));
6232 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6233 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6234 }
6235
6236 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
6237 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
6238 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
6239 unsigned NumBits = ElemTy.getSizeInBits();
6240 SDValue WidthMinus1 =
6241 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6242 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
6243 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
6244 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
6245 }
6246
6247 // Compute with: cttz(x) = ctpop(lsb - 1)
6248
6249 // Compute LSB - 1.
6250 SDValue Bits;
6251 if (ElemTy == MVT::i64) {
6252 // Load constant 0xffff'ffff'ffff'ffff to register.
6253 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6254 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
6255 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
6256 } else {
6257 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6258 DAG.getTargetConstant(1, dl, ElemTy));
6259 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6260 }
6261 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6262 }
6263
6264 if (!ST->hasV6T2Ops())
6265 return SDValue();
6266
6267 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
6268 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
6269}
6270
6272 const ARMSubtarget *ST) {
6273 EVT VT = N->getValueType(0);
6274 SDLoc DL(N);
6275
6276 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
6277 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
6278 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
6279 "Unexpected type for custom ctpop lowering");
6280
6281 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6282 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
6283 SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
6284 Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
6285
6286 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
6287 unsigned EltSize = 8;
6288 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
6289 while (EltSize != VT.getScalarSizeInBits()) {
6291 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
6292 TLI.getPointerTy(DAG.getDataLayout())));
6293 Ops.push_back(Res);
6294
6295 EltSize *= 2;
6296 NumElts /= 2;
6297 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
6298 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
6299 }
6300
6301 return Res;
6302}
6303
6304/// Getvshiftimm - Check if this is a valid build_vector for the immediate
6305/// operand of a vector shift operation, where all the elements of the
6306/// build_vector must have the same constant integer value.
6307static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
6308 // Ignore bit_converts.
6309 while (Op.getOpcode() == ISD::BITCAST)
6310 Op = Op.getOperand(0);
6312 APInt SplatBits, SplatUndef;
6313 unsigned SplatBitSize;
6314 bool HasAnyUndefs;
6315 if (!BVN ||
6316 !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6317 ElementBits) ||
6318 SplatBitSize > ElementBits)
6319 return false;
6320 Cnt = SplatBits.getSExtValue();
6321 return true;
6322}
6323
6324/// isVShiftLImm - Check if this is a valid build_vector for the immediate
6325/// operand of a vector shift left operation. That value must be in the range:
6326/// 0 <= Value < ElementBits for a left shift; or
6327/// 0 <= Value <= ElementBits for a long left shift.
6328static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
6329 assert(VT.isVector() && "vector shift count is not a vector type");
6330 int64_t ElementBits = VT.getScalarSizeInBits();
6331 if (!getVShiftImm(Op, ElementBits, Cnt))
6332 return false;
6333 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6334}
6335
6336/// isVShiftRImm - Check if this is a valid build_vector for the immediate
6337/// operand of a vector shift right operation. For a shift opcode, the value
6338/// is positive, but for an intrinsic the value count must be negative. The
6339/// absolute value must be in the range:
6340/// 1 <= |Value| <= ElementBits for a right shift; or
6341/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
6342static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
6343 int64_t &Cnt) {
6344 assert(VT.isVector() && "vector shift count is not a vector type");
6345 int64_t ElementBits = VT.getScalarSizeInBits();
6346 if (!getVShiftImm(Op, ElementBits, Cnt))
6347 return false;
6348 if (!isIntrinsic)
6349 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6350 if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6351 Cnt = -Cnt;
6352 return true;
6353 }
6354 return false;
6355}
6356
6358 const ARMSubtarget *ST) {
6359 EVT VT = N->getValueType(0);
6360 SDLoc dl(N);
6361 int64_t Cnt;
6362
6363 if (!VT.isVector())
6364 return SDValue();
6365
6366 // We essentially have two forms here. Shift by an immediate and shift by a
6367 // vector register (there are also shift by a gpr, but that is just handled
6368 // with a tablegen pattern). We cannot easily match shift by an immediate in
6369 // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
6370 // For shifting by a vector, we don't have VSHR, only VSHL (which can be
6371 // signed or unsigned, and a negative shift indicates a shift right).
6372 if (N->getOpcode() == ISD::SHL) {
6373 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
6374 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
6375 DAG.getConstant(Cnt, dl, MVT::i32));
6376 return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
6377 N->getOperand(1));
6378 }
6379
6380 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
6381 "unexpected vector shift opcode");
6382
6383 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
6384 unsigned VShiftOpc =
6385 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
6386 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
6387 DAG.getConstant(Cnt, dl, MVT::i32));
6388 }
6389
6390 // Other right shifts we don't have operations for (we use a shift left by a
6391 // negative number).
6392 EVT ShiftVT = N->getOperand(1).getValueType();
6393 SDValue NegatedCount = DAG.getNode(
6394 ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
6395 unsigned VShiftOpc =
6396 (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
6397 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
6398}
6399
6401 const ARMSubtarget *ST) {
6402 EVT VT = N->getValueType(0);
6403 SDLoc dl(N);
6404
6405 // We can get here for a node like i32 = ISD::SHL i32, i64
6406 if (VT != MVT::i64)
6407 return SDValue();
6408
6409 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||
6410 N->getOpcode() == ISD::SHL) &&
6411 "Unknown shift to lower!");
6412
6413 unsigned ShOpc = N->getOpcode();
6414 if (ST->hasMVEIntegerOps()) {
6415 SDValue ShAmt = N->getOperand(1);
6416 unsigned ShPartsOpc = ARMISD::LSLL;
6418
6419 // If the shift amount is greater than 32 or has a greater bitwidth than 64
6420 // then do the default optimisation
6421 if ((!Con && ShAmt->getValueType(0).getSizeInBits() > 64) ||
6422 (Con && (Con->getAPIntValue() == 0 || Con->getAPIntValue().uge(32))))
6423 return SDValue();
6424
6425 // Extract the lower 32 bits of the shift amount if it's not an i32
6426 if (ShAmt->getValueType(0) != MVT::i32)
6427 ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32);
6428
6429 if (ShOpc == ISD::SRL) {
6430 if (!Con)
6431 // There is no t2LSRLr instruction so negate and perform an lsll if the
6432 // shift amount is in a register, emulating a right shift.
6433 ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6434 DAG.getConstant(0, dl, MVT::i32), ShAmt);
6435 else
6436 // Else generate an lsrl on the immediate shift amount
6437 ShPartsOpc = ARMISD::LSRL;
6438 } else if (ShOpc == ISD::SRA)
6439 ShPartsOpc = ARMISD::ASRL;
6440
6441 // Split Lower/Upper 32 bits of the destination/source
6442 SDValue Lo, Hi;
6443 std::tie(Lo, Hi) =
6444 DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6445 // Generate the shift operation as computed above
6446 Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
6447 ShAmt);
6448 // The upper 32 bits come from the second return value of lsll
6449 Hi = SDValue(Lo.getNode(), 1);
6450 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6451 }
6452
6453 // We only lower SRA, SRL of 1 here, all others use generic lowering.
6454 if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
6455 return SDValue();
6456
6457 // If we are in thumb mode, we don't have RRX.
6458 if (ST->isThumb1Only())
6459 return SDValue();
6460
6461 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
6462 SDValue Lo, Hi;
6463 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6464
6465 // First, build a LSRS1/ASRS1 op, which shifts the top part by one and
6466 // captures the shifted out bit into a carry flag.
6467 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::LSRS1 : ARMISD::ASRS1;
6468 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, FlagsVT), Hi);
6469
6470 // The low part is an ARMISD::RRX operand, which shifts the carry in.
6471 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
6472
6473 // Merge the pieces into a single i64 value.
6474 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6475}
6476
6478 const ARMSubtarget *ST) {
6479 bool Invert = false;
6480 bool Swap = false;
6481 unsigned Opc = ARMCC::AL;
6482
6483 SDValue Op0 = Op.getOperand(0);
6484 SDValue Op1 = Op.getOperand(1);
6485 SDValue CC = Op.getOperand(2);
6486 EVT VT = Op.getValueType();
6487 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
6488 SDLoc dl(Op);
6489
6490 EVT CmpVT;
6491 if (ST->hasNEON())
6493 else {
6494 assert(ST->hasMVEIntegerOps() &&
6495 "No hardware support for integer vector comparison!");
6496
6497 if (Op.getValueType().getVectorElementType() != MVT::i1)
6498 return SDValue();
6499
6500 // Make sure we expand floating point setcc to scalar if we do not have
6501 // mve.fp, so that we can handle them from there.
6502 if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
6503 return SDValue();
6504
6505 CmpVT = VT;
6506 }
6507
6508 if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
6509 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
6510 // Special-case integer 64-bit equality comparisons. They aren't legal,
6511 // but they can be lowered with a few vector instructions.
6512 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
6513 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
6514 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
6515 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
6516 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
6517 DAG.getCondCode(ISD::SETEQ));
6518 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
6519 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
6520 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
6521 if (SetCCOpcode == ISD::SETNE)
6522 Merged = DAG.getNOT(dl, Merged, CmpVT);
6523 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
6524 return Merged;
6525 }
6526
6527 if (CmpVT.getVectorElementType() == MVT::i64)
6528 // 64-bit comparisons are not legal in general.
6529 return SDValue();
6530
6531 if (Op1.getValueType().isFloatingPoint()) {
6532 switch (SetCCOpcode) {
6533 default: llvm_unreachable("Illegal FP comparison");
6534 case ISD::SETUNE:
6535 case ISD::SETNE:
6536 if (ST->hasMVEFloatOps()) {
6537 Opc = ARMCC::NE; break;
6538 } else {
6539 Invert = true; [[fallthrough]];
6540 }
6541 case ISD::SETOEQ:
6542 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6543 case ISD::SETOLT:
6544 case ISD::SETLT: Swap = true; [[fallthrough]];
6545 case ISD::SETOGT:
6546 case ISD::SETGT: Opc = ARMCC::GT; break;
6547 case ISD::SETOLE:
6548 case ISD::SETLE: Swap = true; [[fallthrough]];
6549 case ISD::SETOGE:
6550 case ISD::SETGE: Opc = ARMCC::GE; break;
6551 case ISD::SETUGE: Swap = true; [[fallthrough]];
6552 case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break;
6553 case ISD::SETUGT: Swap = true; [[fallthrough]];
6554 case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break;
6555 case ISD::SETUEQ: Invert = true; [[fallthrough]];
6556 case ISD::SETONE: {
6557 // Expand this to (OLT | OGT).
6558 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6559 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6560 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6561 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6562 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6563 if (Invert)
6564 Result = DAG.getNOT(dl, Result, VT);
6565 return Result;
6566 }
6567 case ISD::SETUO: Invert = true; [[fallthrough]];
6568 case ISD::SETO: {
6569 // Expand this to (OLT | OGE).
6570 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6571 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6572 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6573 DAG.getConstant(ARMCC::GE, dl, MVT::i32));
6574 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6575 if (Invert)
6576 Result = DAG.getNOT(dl, Result, VT);
6577 return Result;
6578 }
6579 }
6580 } else {
6581 // Integer comparisons.
6582 switch (SetCCOpcode) {
6583 default: llvm_unreachable("Illegal integer comparison");
6584 case ISD::SETNE:
6585 if (ST->hasMVEIntegerOps()) {
6586 Opc = ARMCC::NE; break;
6587 } else {
6588 Invert = true; [[fallthrough]];
6589 }
6590 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6591 case ISD::SETLT: Swap = true; [[fallthrough]];
6592 case ISD::SETGT: Opc = ARMCC::GT; break;
6593 case ISD::SETLE: Swap = true; [[fallthrough]];
6594 case ISD::SETGE: Opc = ARMCC::GE; break;
6595 case ISD::SETULT: Swap = true; [[fallthrough]];
6596 case ISD::SETUGT: Opc = ARMCC::HI; break;
6597 case ISD::SETULE: Swap = true; [[fallthrough]];
6598 case ISD::SETUGE: Opc = ARMCC::HS; break;
6599 }
6600
6601 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
6602 if (ST->hasNEON() && Opc == ARMCC::EQ) {
6603 SDValue AndOp;
6605 AndOp = Op0;
6606 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
6607 AndOp = Op1;
6608
6609 // Ignore bitconvert.
6610 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
6611 AndOp = AndOp.getOperand(0);
6612
6613 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
6614 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
6615 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
6616 SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1);
6617 if (!Invert)
6618 Result = DAG.getNOT(dl, Result, VT);
6619 return Result;
6620 }
6621 }
6622 }
6623
6624 if (Swap)
6625 std::swap(Op0, Op1);
6626
6627 // If one of the operands is a constant vector zero, attempt to fold the
6628 // comparison to a specialized compare-against-zero form.
6630 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::EQ ||
6631 Opc == ARMCC::NE)) {
6632 if (Opc == ARMCC::GE)
6633 Opc = ARMCC::LE;
6634 else if (Opc == ARMCC::GT)
6635 Opc = ARMCC::LT;
6636 std::swap(Op0, Op1);
6637 }
6638
6639 SDValue Result;
6641 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::LE ||
6642 Opc == ARMCC::LT || Opc == ARMCC::NE || Opc == ARMCC::EQ))
6643 Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, Op0,
6644 DAG.getConstant(Opc, dl, MVT::i32));
6645 else
6646 Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6647 DAG.getConstant(Opc, dl, MVT::i32));
6648
6649 Result = DAG.getSExtOrTrunc(Result, dl, VT);
6650
6651 if (Invert)
6652 Result = DAG.getNOT(dl, Result, VT);
6653
6654 return Result;
6655}
6656
6658 SDValue LHS = Op.getOperand(0);
6659 SDValue RHS = Op.getOperand(1);
6660 SDValue Carry = Op.getOperand(2);
6661 SDValue Cond = Op.getOperand(3);
6662 SDLoc DL(Op);
6663
6664 assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
6665
6666 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
6667 // have to invert the carry first.
6668 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
6669 DAG.getConstant(1, DL, MVT::i32), Carry);
6670 // This converts the boolean value carry into the carry flag.
6671 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
6672
6673 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
6674 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
6675
6676 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
6677 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
6678 SDValue ARMcc = DAG.getConstant(
6679 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
6680 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
6681 Cmp.getValue(1));
6682}
6683
6684/// isVMOVModifiedImm - Check if the specified splat value corresponds to a
6685/// valid vector constant for a NEON or MVE instruction with a "modified
6686/// immediate" operand (e.g., VMOV). If so, return the encoded value.
6687static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
6688 unsigned SplatBitSize, SelectionDAG &DAG,
6689 const SDLoc &dl, EVT &VT, EVT VectorVT,
6690 VMOVModImmType type) {
6691 unsigned OpCmode, Imm;
6692 bool is128Bits = VectorVT.is128BitVector();
6693
6694 // SplatBitSize is set to the smallest size that splats the vector, so a
6695 // zero vector will always have SplatBitSize == 8. However, NEON modified
6696 // immediate instructions others than VMOV do not support the 8-bit encoding
6697 // of a zero vector, and the default encoding of zero is supposed to be the
6698 // 32-bit version.
6699 if (SplatBits == 0)
6700 SplatBitSize = 32;
6701
6702 switch (SplatBitSize) {
6703 case 8:
6704 if (type != VMOVModImm)
6705 return SDValue();
6706 // Any 1-byte value is OK. Op=0, Cmode=1110.
6707 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
6708 OpCmode = 0xe;
6709 Imm = SplatBits;
6710 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
6711 break;
6712
6713 case 16:
6714 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
6715 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
6716 if ((SplatBits & ~0xff) == 0) {
6717 // Value = 0x00nn: Op=x, Cmode=100x.
6718 OpCmode = 0x8;
6719 Imm = SplatBits;
6720 break;
6721 }
6722 if ((SplatBits & ~0xff00) == 0) {
6723 // Value = 0xnn00: Op=x, Cmode=101x.
6724 OpCmode = 0xa;
6725 Imm = SplatBits >> 8;
6726 break;
6727 }
6728 return SDValue();
6729
6730 case 32:
6731 // NEON's 32-bit VMOV supports splat values where:
6732 // * only one byte is nonzero, or
6733 // * the least significant byte is 0xff and the second byte is nonzero, or
6734 // * the least significant 2 bytes are 0xff and the third is nonzero.
6735 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
6736 if ((SplatBits & ~0xff) == 0) {
6737 // Value = 0x000000nn: Op=x, Cmode=000x.
6738 OpCmode = 0;
6739 Imm = SplatBits;
6740 break;
6741 }
6742 if ((SplatBits & ~0xff00) == 0) {
6743 // Value = 0x0000nn00: Op=x, Cmode=001x.
6744 OpCmode = 0x2;
6745 Imm = SplatBits >> 8;
6746 break;
6747 }
6748 if ((SplatBits & ~0xff0000) == 0) {
6749 // Value = 0x00nn0000: Op=x, Cmode=010x.
6750 OpCmode = 0x4;
6751 Imm = SplatBits >> 16;
6752 break;
6753 }
6754 if ((SplatBits & ~0xff000000) == 0) {
6755 // Value = 0xnn000000: Op=x, Cmode=011x.
6756 OpCmode = 0x6;
6757 Imm = SplatBits >> 24;
6758 break;
6759 }
6760
6761 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
6762 if (type == OtherModImm) return SDValue();
6763
6764 if ((SplatBits & ~0xffff) == 0 &&
6765 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
6766 // Value = 0x0000nnff: Op=x, Cmode=1100.
6767 OpCmode = 0xc;
6768 Imm = SplatBits >> 8;
6769 break;
6770 }
6771
6772 // cmode == 0b1101 is not supported for MVE VMVN
6773 if (type == MVEVMVNModImm)
6774 return SDValue();
6775
6776 if ((SplatBits & ~0xffffff) == 0 &&
6777 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
6778 // Value = 0x00nnffff: Op=x, Cmode=1101.
6779 OpCmode = 0xd;
6780 Imm = SplatBits >> 16;
6781 break;
6782 }
6783
6784 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
6785 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
6786 // VMOV.I32. A (very) minor optimization would be to replicate the value
6787 // and fall through here to test for a valid 64-bit splat. But, then the
6788 // caller would also need to check and handle the change in size.
6789 return SDValue();
6790
6791 case 64: {
6792 if (type != VMOVModImm)
6793 return SDValue();
6794 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
6795 uint64_t BitMask = 0xff;
6796 unsigned ImmMask = 1;
6797 Imm = 0;
6798 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
6799 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
6800 Imm |= ImmMask;
6801 } else if ((SplatBits & BitMask) != 0) {
6802 return SDValue();
6803 }
6804 BitMask <<= 8;
6805 ImmMask <<= 1;
6806 }
6807
6808 // Op=1, Cmode=1110.
6809 OpCmode = 0x1e;
6810 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
6811 break;
6812 }
6813
6814 default:
6815 llvm_unreachable("unexpected size for isVMOVModifiedImm");
6816 }
6817
6818 unsigned EncodedVal = ARM_AM::createVMOVModImm(OpCmode, Imm);
6819 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
6820}
6821
6822SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
6823 const ARMSubtarget *ST) const {
6824 EVT VT = Op.getValueType();
6825 bool IsDouble = (VT == MVT::f64);
6826 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
6827 const APFloat &FPVal = CFP->getValueAPF();
6828
6829 // Prevent floating-point constants from using literal loads
6830 // when execute-only is enabled.
6831 if (ST->genExecuteOnly()) {
6832 // We shouldn't trigger this for v6m execute-only
6833 assert((!ST->isThumb1Only() || ST->hasV8MBaselineOps()) &&
6834 "Unexpected architecture");
6835
6836 // If we can represent the constant as an immediate, don't lower it
6837 if (isFPImmLegal(FPVal, VT))
6838 return Op;
6839 // Otherwise, construct as integer, and move to float register
6840 APInt INTVal = FPVal.bitcastToAPInt();
6841 SDLoc DL(CFP);
6842 switch (VT.getSimpleVT().SimpleTy) {
6843 default:
6844 llvm_unreachable("Unknown floating point type!");
6845 break;
6846 case MVT::f64: {
6847 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
6848 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
6849 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
6850 }
6851 case MVT::f32:
6852 return DAG.getNode(ARMISD::VMOVSR, DL, VT,
6853 DAG.getConstant(INTVal, DL, MVT::i32));
6854 }
6855 }
6856
6857 if (!ST->hasVFP3Base())
6858 return SDValue();
6859
6860 // Use the default (constant pool) lowering for double constants when we have
6861 // an SP-only FPU
6862 if (IsDouble && !Subtarget->hasFP64())
6863 return SDValue();
6864
6865 // Try splatting with a VMOV.f32...
6866 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
6867
6868 if (ImmVal != -1) {
6869 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
6870 // We have code in place to select a valid ConstantFP already, no need to
6871 // do any mangling.
6872 return Op;
6873 }
6874
6875 // It's a float and we are trying to use NEON operations where
6876 // possible. Lower it to a splat followed by an extract.
6877 SDLoc DL(Op);
6878 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
6879 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
6880 NewVal);
6881 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
6882 DAG.getConstant(0, DL, MVT::i32));
6883 }
6884
6885 // The rest of our options are NEON only, make sure that's allowed before
6886 // proceeding..
6887 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
6888 return SDValue();
6889
6890 EVT VMovVT;
6891 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
6892
6893 // It wouldn't really be worth bothering for doubles except for one very
6894 // important value, which does happen to match: 0.0. So make sure we don't do
6895 // anything stupid.
6896 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
6897 return SDValue();
6898
6899 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
6900 SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
6901 VMovVT, VT, VMOVModImm);
6902 if (NewVal != SDValue()) {
6903 SDLoc DL(Op);
6904 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
6905 NewVal);
6906 if (IsDouble)
6907 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
6908
6909 // It's a float: cast and extract a vector element.
6910 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
6911 VecConstant);
6912 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
6913 DAG.getConstant(0, DL, MVT::i32));
6914 }
6915
6916 // Finally, try a VMVN.i32
6917 NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
6918 VT, VMVNModImm);
6919 if (NewVal != SDValue()) {
6920 SDLoc DL(Op);
6921 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
6922
6923 if (IsDouble)
6924 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
6925
6926 // It's a float: cast and extract a vector element.
6927 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
6928 VecConstant);
6929 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
6930 DAG.getConstant(0, DL, MVT::i32));
6931 }
6932
6933 return SDValue();
6934}
6935
6936// check if an VEXT instruction can handle the shuffle mask when the
6937// vector sources of the shuffle are the same.
6938static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
6939 unsigned NumElts = VT.getVectorNumElements();
6940
6941 // Assume that the first shuffle index is not UNDEF. Fail if it is.
6942 if (M[0] < 0)
6943 return false;
6944
6945 Imm = M[0];
6946
6947 // If this is a VEXT shuffle, the immediate value is the index of the first
6948 // element. The other shuffle indices must be the successive elements after
6949 // the first one.
6950 unsigned ExpectedElt = Imm;
6951 for (unsigned i = 1; i < NumElts; ++i) {
6952 // Increment the expected index. If it wraps around, just follow it
6953 // back to index zero and keep going.
6954 ++ExpectedElt;
6955 if (ExpectedElt == NumElts)
6956 ExpectedElt = 0;
6957
6958 if (M[i] < 0) continue; // ignore UNDEF indices
6959 if (ExpectedElt != static_cast<unsigned>(M[i]))
6960 return false;
6961 }
6962
6963 return true;
6964}
6965
6966static bool isVEXTMask(ArrayRef<int> M, EVT VT,
6967 bool &ReverseVEXT, unsigned &Imm) {
6968 unsigned NumElts = VT.getVectorNumElements();
6969 ReverseVEXT = false;
6970
6971 // Assume that the first shuffle index is not UNDEF. Fail if it is.
6972 if (M[0] < 0)
6973 return false;
6974
6975 Imm = M[0];
6976
6977 // If this is a VEXT shuffle, the immediate value is the index of the first
6978 // element. The other shuffle indices must be the successive elements after
6979 // the first one.
6980 unsigned ExpectedElt = Imm;
6981 for (unsigned i = 1; i < NumElts; ++i) {
6982 // Increment the expected index. If it wraps around, it may still be
6983 // a VEXT but the source vectors must be swapped.
6984 ExpectedElt += 1;
6985 if (ExpectedElt == NumElts * 2) {
6986 ExpectedElt = 0;
6987 ReverseVEXT = true;
6988 }
6989
6990 if (M[i] < 0) continue; // ignore UNDEF indices
6991 if (ExpectedElt != static_cast<unsigned>(M[i]))
6992 return false;
6993 }
6994
6995 // Adjust the index value if the source operands will be swapped.
6996 if (ReverseVEXT)
6997 Imm -= NumElts;
6998
6999 return true;
7000}
7001
7002static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
7003 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
7004 // range, then 0 is placed into the resulting vector. So pretty much any mask
7005 // of 8 elements can work here.
7006 return VT == MVT::v8i8 && M.size() == 8;
7007}
7008
7009static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
7010 unsigned Index) {
7011 if (Mask.size() == Elements * 2)
7012 return Index / Elements;
7013 return Mask[Index] == 0 ? 0 : 1;
7014}
7015
7016// Checks whether the shuffle mask represents a vector transpose (VTRN) by
7017// checking that pairs of elements in the shuffle mask represent the same index
7018// in each vector, incrementing the expected index by 2 at each step.
7019// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
7020// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
7021// v2={e,f,g,h}
7022// WhichResult gives the offset for each element in the mask based on which
7023// of the two results it belongs to.
7024//
7025// The transpose can be represented either as:
7026// result1 = shufflevector v1, v2, result1_shuffle_mask
7027// result2 = shufflevector v1, v2, result2_shuffle_mask
7028// where v1/v2 and the shuffle masks have the same number of elements
7029// (here WhichResult (see below) indicates which result is being checked)
7030//
7031// or as:
7032// results = shufflevector v1, v2, shuffle_mask
7033// where both results are returned in one vector and the shuffle mask has twice
7034// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
7035// want to check the low half and high half of the shuffle mask as if it were
7036// the other case
7037static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7038 unsigned EltSz = VT.getScalarSizeInBits();
7039 if (EltSz == 64)
7040 return false;
7041
7042 unsigned NumElts = VT.getVectorNumElements();
7043 if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
7044 return false;
7045
7046 // If the mask is twice as long as the input vector then we need to check the
7047 // upper and lower parts of the mask with a matching value for WhichResult
7048 // FIXME: A mask with only even values will be rejected in case the first
7049 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
7050 // M[0] is used to determine WhichResult
7051 for (unsigned i = 0; i < M.size(); i += NumElts) {
7052 WhichResult = SelectPairHalf(NumElts, M, i);
7053 for (unsigned j = 0; j < NumElts; j += 2) {
7054 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7055 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
7056 return false;
7057 }
7058 }
7059
7060 if (M.size() == NumElts*2)
7061 WhichResult = 0;
7062
7063 return true;
7064}
7065
7066/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
7067/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7068/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
7069static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7070 unsigned EltSz = VT.getScalarSizeInBits();
7071 if (EltSz == 64)
7072 return false;
7073
7074 unsigned NumElts = VT.getVectorNumElements();
7075 if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
7076 return false;
7077
7078 for (unsigned i = 0; i < M.size(); i += NumElts) {
7079 WhichResult = SelectPairHalf(NumElts, M, i);
7080 for (unsigned j = 0; j < NumElts; j += 2) {
7081 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7082 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
7083 return false;
7084 }
7085 }
7086
7087 if (M.size() == NumElts*2)
7088 WhichResult = 0;
7089
7090 return true;
7091}
7092
7093// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
7094// that the mask elements are either all even and in steps of size 2 or all odd
7095// and in steps of size 2.
7096// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
7097// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
7098// v2={e,f,g,h}
7099// Requires similar checks to that of isVTRNMask with
7100// respect the how results are returned.
7101static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7102 unsigned EltSz = VT.getScalarSizeInBits();
7103 if (EltSz == 64)
7104 return false;
7105
7106 unsigned NumElts = VT.getVectorNumElements();
7107 if (M.size() != NumElts && M.size() != NumElts*2)
7108 return false;
7109
7110 for (unsigned i = 0; i < M.size(); i += NumElts) {
7111 WhichResult = SelectPairHalf(NumElts, M, i);
7112 for (unsigned j = 0; j < NumElts; ++j) {
7113 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
7114 return false;
7115 }
7116 }
7117
7118 if (M.size() == NumElts*2)
7119 WhichResult = 0;
7120
7121 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7122 if (VT.is64BitVector() && EltSz == 32)
7123 return false;
7124
7125 return true;
7126}
7127
7128/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
7129/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7130/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
7131static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7132 unsigned EltSz = VT.getScalarSizeInBits();
7133 if (EltSz == 64)
7134 return false;
7135
7136 unsigned NumElts = VT.getVectorNumElements();
7137 if (M.size() != NumElts && M.size() != NumElts*2)
7138 return false;
7139
7140 unsigned Half = NumElts / 2;
7141 for (unsigned i = 0; i < M.size(); i += NumElts) {
7142 WhichResult = SelectPairHalf(NumElts, M, i);
7143 for (unsigned j = 0; j < NumElts; j += Half) {
7144 unsigned Idx = WhichResult;
7145 for (unsigned k = 0; k < Half; ++k) {
7146 int MIdx = M[i + j + k];
7147 if (MIdx >= 0 && (unsigned) MIdx != Idx)
7148 return false;
7149 Idx += 2;
7150 }
7151 }
7152 }
7153
7154 if (M.size() == NumElts*2)
7155 WhichResult = 0;
7156
7157 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7158 if (VT.is64BitVector() && EltSz == 32)
7159 return false;
7160
7161 return true;
7162}
7163
7164// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
7165// that pairs of elements of the shufflemask represent the same index in each
7166// vector incrementing sequentially through the vectors.
7167// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
7168// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
7169// v2={e,f,g,h}
7170// Requires similar checks to that of isVTRNMask with respect the how results
7171// are returned.
7172static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7173 unsigned EltSz = VT.getScalarSizeInBits();
7174 if (EltSz == 64)
7175 return false;
7176
7177 unsigned NumElts = VT.getVectorNumElements();
7178 if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
7179 return false;
7180
7181 for (unsigned i = 0; i < M.size(); i += NumElts) {
7182 WhichResult = SelectPairHalf(NumElts, M, i);
7183 unsigned Idx = WhichResult * NumElts / 2;
7184 for (unsigned j = 0; j < NumElts; j += 2) {
7185 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7186 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
7187 return false;
7188 Idx += 1;
7189 }
7190 }
7191
7192 if (M.size() == NumElts*2)
7193 WhichResult = 0;
7194
7195 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7196 if (VT.is64BitVector() && EltSz == 32)
7197 return false;
7198
7199 return true;
7200}
7201
7202/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
7203/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7204/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
7205static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7206 unsigned EltSz = VT.getScalarSizeInBits();
7207 if (EltSz == 64)
7208 return false;
7209
7210 unsigned NumElts = VT.getVectorNumElements();
7211 if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
7212 return false;
7213
7214 for (unsigned i = 0; i < M.size(); i += NumElts) {
7215 WhichResult = SelectPairHalf(NumElts, M, i);
7216 unsigned Idx = WhichResult * NumElts / 2;
7217 for (unsigned j = 0; j < NumElts; j += 2) {
7218 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7219 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
7220 return false;
7221 Idx += 1;
7222 }
7223 }
7224
7225 if (M.size() == NumElts*2)
7226 WhichResult = 0;
7227
7228 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7229 if (VT.is64BitVector() && EltSz == 32)
7230 return false;
7231
7232 return true;
7233}
7234
7235/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
7236/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
7237static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
7238 unsigned &WhichResult,
7239 bool &isV_UNDEF) {
7240 isV_UNDEF = false;
7241 if (isVTRNMask(ShuffleMask, VT, WhichResult))
7242 return ARMISD::VTRN;
7243 if (isVUZPMask(ShuffleMask, VT, WhichResult))
7244 return ARMISD::VUZP;
7245 if (isVZIPMask(ShuffleMask, VT, WhichResult))
7246 return ARMISD::VZIP;
7247
7248 isV_UNDEF = true;
7249 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
7250 return ARMISD::VTRN;
7251 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7252 return ARMISD::VUZP;
7253 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7254 return ARMISD::VZIP;
7255
7256 return 0;
7257}
7258
7259/// \return true if this is a reverse operation on an vector.
7260static bool isReverseMask(ArrayRef<int> M, EVT VT) {
7261 unsigned NumElts = VT.getVectorNumElements();
7262 // Make sure the mask has the right size.
7263 if (NumElts != M.size())
7264 return false;
7265
7266 // Look for <15, ..., 3, -1, 1, 0>.
7267 for (unsigned i = 0; i != NumElts; ++i)
7268 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
7269 return false;
7270
7271 return true;
7272}
7273
7274static bool isTruncMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7275 unsigned NumElts = VT.getVectorNumElements();
7276 // Make sure the mask has the right size.
7277 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7278 return false;
7279
7280 // Half-width truncation patterns (e.g. v4i32 -> v8i16):
7281 // !Top && SingleSource: <0, 2, 4, 6, 0, 2, 4, 6>
7282 // !Top && !SingleSource: <0, 2, 4, 6, 8, 10, 12, 14>
7283 // Top && SingleSource: <1, 3, 5, 7, 1, 3, 5, 7>
7284 // Top && !SingleSource: <1, 3, 5, 7, 9, 11, 13, 15>
7285 int Ofs = Top ? 1 : 0;
7286 int Upper = SingleSource ? 0 : NumElts;
7287 for (int i = 0, e = NumElts / 2; i != e; ++i) {
7288 if (M[i] >= 0 && M[i] != (i * 2) + Ofs)
7289 return false;
7290 if (M[i + e] >= 0 && M[i + e] != (i * 2) + Ofs + Upper)
7291 return false;
7292 }
7293 return true;
7294}
7295
7296static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7297 unsigned NumElts = VT.getVectorNumElements();
7298 // Make sure the mask has the right size.
7299 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7300 return false;
7301
7302 // If Top
7303 // Look for <0, N, 2, N+2, 4, N+4, ..>.
7304 // This inserts Input2 into Input1
7305 // else if not Top
7306 // Look for <0, N+1, 2, N+3, 4, N+5, ..>
7307 // This inserts Input1 into Input2
7308 unsigned Offset = Top ? 0 : 1;
7309 unsigned N = SingleSource ? 0 : NumElts;
7310 for (unsigned i = 0; i < NumElts; i += 2) {
7311 if (M[i] >= 0 && M[i] != (int)i)
7312 return false;
7313 if (M[i + 1] >= 0 && M[i + 1] != (int)(N + i + Offset))
7314 return false;
7315 }
7316
7317 return true;
7318}
7319
7320static bool isVMOVNTruncMask(ArrayRef<int> M, EVT ToVT, bool rev) {
7321 unsigned NumElts = ToVT.getVectorNumElements();
7322 if (NumElts != M.size())
7323 return false;
7324
7325 // Test if the Trunc can be convertable to a VMOVN with this shuffle. We are
7326 // looking for patterns of:
7327 // !rev: 0 N/2 1 N/2+1 2 N/2+2 ...
7328 // rev: N/2 0 N/2+1 1 N/2+2 2 ...
7329
7330 unsigned Off0 = rev ? NumElts / 2 : 0;
7331 unsigned Off1 = rev ? 0 : NumElts / 2;
7332 for (unsigned i = 0; i < NumElts; i += 2) {
7333 if (M[i] >= 0 && M[i] != (int)(Off0 + i / 2))
7334 return false;
7335 if (M[i + 1] >= 0 && M[i + 1] != (int)(Off1 + i / 2))
7336 return false;
7337 }
7338
7339 return true;
7340}
7341
7342// Reconstruct an MVE VCVT from a BuildVector of scalar fptrunc, all extracted
7343// from a pair of inputs. For example:
7344// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7345// FP_ROUND(EXTRACT_ELT(Y, 0),
7346// FP_ROUND(EXTRACT_ELT(X, 1),
7347// FP_ROUND(EXTRACT_ELT(Y, 1), ...)
7349 const ARMSubtarget *ST) {
7350 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7351 if (!ST->hasMVEFloatOps())
7352 return SDValue();
7353
7354 SDLoc dl(BV);
7355 EVT VT = BV.getValueType();
7356 if (VT != MVT::v8f16)
7357 return SDValue();
7358
7359 // We are looking for a buildvector of fptrunc elements, where all the
7360 // elements are interleavingly extracted from two sources. Check the first two
7361 // items are valid enough and extract some info from them (they are checked
7362 // properly in the loop below).
7363 if (BV.getOperand(0).getOpcode() != ISD::FP_ROUND ||
7366 return SDValue();
7367 if (BV.getOperand(1).getOpcode() != ISD::FP_ROUND ||
7370 return SDValue();
7371 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7372 SDValue Op1 = BV.getOperand(1).getOperand(0).getOperand(0);
7373 if (Op0.getValueType() != MVT::v4f32 || Op1.getValueType() != MVT::v4f32)
7374 return SDValue();
7375
7376 // Check all the values in the BuildVector line up with our expectations.
7377 for (unsigned i = 1; i < 4; i++) {
7378 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7379 return Trunc.getOpcode() == ISD::FP_ROUND &&
7381 Trunc.getOperand(0).getOperand(0) == Op &&
7382 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7383 };
7384 if (!Check(BV.getOperand(i * 2 + 0), Op0, i))
7385 return SDValue();
7386 if (!Check(BV.getOperand(i * 2 + 1), Op1, i))
7387 return SDValue();
7388 }
7389
7390 SDValue N1 = DAG.getNode(ARMISD::VCVTN, dl, VT, DAG.getUNDEF(VT), Op0,
7391 DAG.getConstant(0, dl, MVT::i32));
7392 return DAG.getNode(ARMISD::VCVTN, dl, VT, N1, Op1,
7393 DAG.getConstant(1, dl, MVT::i32));
7394}
7395
7396// Reconstruct an MVE VCVT from a BuildVector of scalar fpext, all extracted
7397// from a single input on alternating lanes. For example:
7398// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7399// FP_ROUND(EXTRACT_ELT(X, 2),
7400// FP_ROUND(EXTRACT_ELT(X, 4), ...)
7402 const ARMSubtarget *ST) {
7403 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7404 if (!ST->hasMVEFloatOps())
7405 return SDValue();
7406
7407 SDLoc dl(BV);
7408 EVT VT = BV.getValueType();
7409 if (VT != MVT::v4f32)
7410 return SDValue();
7411
7412 // We are looking for a buildvector of fptext elements, where all the
7413 // elements are alternating lanes from a single source. For example <0,2,4,6>
7414 // or <1,3,5,7>. Check the first two items are valid enough and extract some
7415 // info from them (they are checked properly in the loop below).
7416 if (BV.getOperand(0).getOpcode() != ISD::FP_EXTEND ||
7418 return SDValue();
7419 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7421 if (Op0.getValueType() != MVT::v8f16 || (Offset != 0 && Offset != 1))
7422 return SDValue();
7423
7424 // Check all the values in the BuildVector line up with our expectations.
7425 for (unsigned i = 1; i < 4; i++) {
7426 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7427 return Trunc.getOpcode() == ISD::FP_EXTEND &&
7429 Trunc.getOperand(0).getOperand(0) == Op &&
7430 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7431 };
7432 if (!Check(BV.getOperand(i), Op0, 2 * i + Offset))
7433 return SDValue();
7434 }
7435
7436 return DAG.getNode(ARMISD::VCVTL, dl, VT, Op0,
7437 DAG.getConstant(Offset, dl, MVT::i32));
7438}
7439
7440// If N is an integer constant that can be moved into a register in one
7441// instruction, return an SDValue of such a constant (will become a MOV
7442// instruction). Otherwise return null.
7444 const ARMSubtarget *ST, const SDLoc &dl) {
7445 uint64_t Val;
7446 if (!isa<ConstantSDNode>(N))
7447 return SDValue();
7448 Val = N->getAsZExtVal();
7449
7450 if (ST->isThumb1Only()) {
7451 if (Val <= 255 || ~Val <= 255)
7452 return DAG.getConstant(Val, dl, MVT::i32);
7453 } else {
7454 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
7455 return DAG.getConstant(Val, dl, MVT::i32);
7456 }
7457 return SDValue();
7458}
7459
7461 const ARMSubtarget *ST) {
7462 SDLoc dl(Op);
7463 EVT VT = Op.getValueType();
7464
7465 assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!");
7466
7467 unsigned NumElts = VT.getVectorNumElements();
7468 unsigned BoolMask;
7469 unsigned BitsPerBool;
7470 if (NumElts == 2) {
7471 BitsPerBool = 8;
7472 BoolMask = 0xff;
7473 } else if (NumElts == 4) {
7474 BitsPerBool = 4;
7475 BoolMask = 0xf;
7476 } else if (NumElts == 8) {
7477 BitsPerBool = 2;
7478 BoolMask = 0x3;
7479 } else if (NumElts == 16) {
7480 BitsPerBool = 1;
7481 BoolMask = 0x1;
7482 } else
7483 return SDValue();
7484
7485 // If this is a single value copied into all lanes (a splat), we can just sign
7486 // extend that single value
7487 SDValue FirstOp = Op.getOperand(0);
7488 if (!isa<ConstantSDNode>(FirstOp) &&
7489 llvm::all_of(llvm::drop_begin(Op->ops()), [&FirstOp](const SDUse &U) {
7490 return U.get().isUndef() || U.get() == FirstOp;
7491 })) {
7492 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp,
7493 DAG.getValueType(MVT::i1));
7494 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext);
7495 }
7496
7497 // First create base with bits set where known
7498 unsigned Bits32 = 0;
7499 for (unsigned i = 0; i < NumElts; ++i) {
7500 SDValue V = Op.getOperand(i);
7501 if (!isa<ConstantSDNode>(V) && !V.isUndef())
7502 continue;
7503 bool BitSet = V.isUndef() ? false : V->getAsZExtVal();
7504 if (BitSet)
7505 Bits32 |= BoolMask << (i * BitsPerBool);
7506 }
7507
7508 // Add in unknown nodes
7509 SDValue Base = DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT,
7510 DAG.getConstant(Bits32, dl, MVT::i32));
7511 for (unsigned i = 0; i < NumElts; ++i) {
7512 SDValue V = Op.getOperand(i);
7513 if (isa<ConstantSDNode>(V) || V.isUndef())
7514 continue;
7515 Base = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Base, V,
7516 DAG.getConstant(i, dl, MVT::i32));
7517 }
7518
7519 return Base;
7520}
7521
7523 const ARMSubtarget *ST) {
7524 if (!ST->hasMVEIntegerOps())
7525 return SDValue();
7526
7527 // We are looking for a buildvector where each element is Op[0] + i*N
7528 EVT VT = Op.getValueType();
7529 SDValue Op0 = Op.getOperand(0);
7530 unsigned NumElts = VT.getVectorNumElements();
7531
7532 // Get the increment value from operand 1
7533 SDValue Op1 = Op.getOperand(1);
7534 if (Op1.getOpcode() != ISD::ADD || Op1.getOperand(0) != Op0 ||
7536 return SDValue();
7537 unsigned N = Op1.getConstantOperandVal(1);
7538 if (N != 1 && N != 2 && N != 4 && N != 8)
7539 return SDValue();
7540
7541 // Check that each other operand matches
7542 for (unsigned I = 2; I < NumElts; I++) {
7543 SDValue OpI = Op.getOperand(I);
7544 if (OpI.getOpcode() != ISD::ADD || OpI.getOperand(0) != Op0 ||
7546 OpI.getConstantOperandVal(1) != I * N)
7547 return SDValue();
7548 }
7549
7550 SDLoc DL(Op);
7551 return DAG.getNode(ARMISD::VIDUP, DL, DAG.getVTList(VT, MVT::i32), Op0,
7552 DAG.getConstant(N, DL, MVT::i32));
7553}
7554
7555// Returns true if the operation N can be treated as qr instruction variant at
7556// operand Op.
7557static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op) {
7558 switch (N->getOpcode()) {
7559 case ISD::ADD:
7560 case ISD::MUL:
7561 case ISD::SADDSAT:
7562 case ISD::UADDSAT:
7563 case ISD::AVGFLOORS:
7564 case ISD::AVGFLOORU:
7565 return true;
7566 case ISD::SUB:
7567 case ISD::SSUBSAT:
7568 case ISD::USUBSAT:
7569 return N->getOperand(1).getNode() == Op;
7571 switch (N->getConstantOperandVal(0)) {
7572 case Intrinsic::arm_mve_add_predicated:
7573 case Intrinsic::arm_mve_mul_predicated:
7574 case Intrinsic::arm_mve_qadd_predicated:
7575 case Intrinsic::arm_mve_vhadd:
7576 case Intrinsic::arm_mve_hadd_predicated:
7577 case Intrinsic::arm_mve_vqdmulh:
7578 case Intrinsic::arm_mve_qdmulh_predicated:
7579 case Intrinsic::arm_mve_vqrdmulh:
7580 case Intrinsic::arm_mve_qrdmulh_predicated:
7581 case Intrinsic::arm_mve_vqdmull:
7582 case Intrinsic::arm_mve_vqdmull_predicated:
7583 return true;
7584 case Intrinsic::arm_mve_sub_predicated:
7585 case Intrinsic::arm_mve_qsub_predicated:
7586 case Intrinsic::arm_mve_vhsub:
7587 case Intrinsic::arm_mve_hsub_predicated:
7588 return N->getOperand(2).getNode() == Op;
7589 default:
7590 return false;
7591 }
7592 default:
7593 return false;
7594 }
7595}
7596
7597// If this is a case we can't handle, return null and let the default
7598// expansion code take care of it.
7599SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
7600 const ARMSubtarget *ST) const {
7601 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
7602 SDLoc dl(Op);
7603 EVT VT = Op.getValueType();
7604
7605 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
7606 return LowerBUILD_VECTOR_i1(Op, DAG, ST);
7607
7608 if (SDValue R = LowerBUILD_VECTORToVIDUP(Op, DAG, ST))
7609 return R;
7610
7611 APInt SplatBits, SplatUndef;
7612 unsigned SplatBitSize;
7613 bool HasAnyUndefs;
7614 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7615 if (SplatUndef.isAllOnes())
7616 return DAG.getUNDEF(VT);
7617
7618 // If all the users of this constant splat are qr instruction variants,
7619 // generate a vdup of the constant.
7620 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize &&
7621 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&
7622 all_of(BVN->users(),
7623 [BVN](const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {
7624 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7625 : SplatBitSize == 16 ? MVT::v8i16
7626 : MVT::v16i8;
7627 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7628 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7629 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7630 }
7631
7632 if ((ST->hasNEON() && SplatBitSize <= 64) ||
7633 (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
7634 // Check if an immediate VMOV works.
7635 EVT VmovVT;
7636 SDValue Val =
7637 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
7638 SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm);
7639
7640 if (Val.getNode()) {
7641 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
7642 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
7643 }
7644
7645 // Try an immediate VMVN.
7646 uint64_t NegatedImm = (~SplatBits).getZExtValue();
7647 Val = isVMOVModifiedImm(
7648 NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
7649 VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
7650 if (Val.getNode()) {
7651 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
7652 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
7653 }
7654
7655 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
7656 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
7657 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
7658 if (ImmVal != -1) {
7659 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
7660 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
7661 }
7662 }
7663
7664 // If we are under MVE, generate a VDUP(constant), bitcast to the original
7665 // type.
7666 if (ST->hasMVEIntegerOps() &&
7667 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32)) {
7668 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7669 : SplatBitSize == 16 ? MVT::v8i16
7670 : MVT::v16i8;
7671 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7672 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7673 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7674 }
7675 }
7676 }
7677
7678 // Scan through the operands to see if only one value is used.
7679 //
7680 // As an optimisation, even if more than one value is used it may be more
7681 // profitable to splat with one value then change some lanes.
7682 //
7683 // Heuristically we decide to do this if the vector has a "dominant" value,
7684 // defined as splatted to more than half of the lanes.
7685 unsigned NumElts = VT.getVectorNumElements();
7686 bool isOnlyLowElement = true;
7687 bool usesOnlyOneValue = true;
7688 bool hasDominantValue = false;
7689 bool isConstant = true;
7690
7691 // Map of the number of times a particular SDValue appears in the
7692 // element list.
7693 DenseMap<SDValue, unsigned> ValueCounts;
7694 SDValue Value;
7695 for (unsigned i = 0; i < NumElts; ++i) {
7696 SDValue V = Op.getOperand(i);
7697 if (V.isUndef())
7698 continue;
7699 if (i > 0)
7700 isOnlyLowElement = false;
7702 isConstant = false;
7703
7704 unsigned &Count = ValueCounts[V];
7705
7706 // Is this value dominant? (takes up more than half of the lanes)
7707 if (++Count > (NumElts / 2)) {
7708 hasDominantValue = true;
7709 Value = V;
7710 }
7711 }
7712 if (ValueCounts.size() != 1)
7713 usesOnlyOneValue = false;
7714 if (!Value.getNode() && !ValueCounts.empty())
7715 Value = ValueCounts.begin()->first;
7716
7717 if (ValueCounts.empty())
7718 return DAG.getUNDEF(VT);
7719
7720 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
7721 // Keep going if we are hitting this case.
7722 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
7723 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
7724
7725 unsigned EltSize = VT.getScalarSizeInBits();
7726
7727 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
7728 // i32 and try again.
7729 if (hasDominantValue && EltSize <= 32) {
7730 if (!isConstant) {
7731 SDValue N;
7732
7733 // If we are VDUPing a value that comes directly from a vector, that will
7734 // cause an unnecessary move to and from a GPR, where instead we could
7735 // just use VDUPLANE. We can only do this if the lane being extracted
7736 // is at a constant index, as the VDUP from lane instructions only have
7737 // constant-index forms.
7738 ConstantSDNode *constIndex;
7739 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7740 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
7741 // We need to create a new undef vector to use for the VDUPLANE if the
7742 // size of the vector from which we get the value is different than the
7743 // size of the vector that we need to create. We will insert the element
7744 // such that the register coalescer will remove unnecessary copies.
7745 if (VT != Value->getOperand(0).getValueType()) {
7746 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
7748 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
7749 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
7750 Value, DAG.getConstant(index, dl, MVT::i32)),
7751 DAG.getConstant(index, dl, MVT::i32));
7752 } else
7753 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
7754 Value->getOperand(0), Value->getOperand(1));
7755 } else
7756 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
7757
7758 if (!usesOnlyOneValue) {
7759 // The dominant value was splatted as 'N', but we now have to insert
7760 // all differing elements.
7761 for (unsigned I = 0; I < NumElts; ++I) {
7762 if (Op.getOperand(I) == Value)
7763 continue;
7765 Ops.push_back(N);
7766 Ops.push_back(Op.getOperand(I));
7767 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
7768 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
7769 }
7770 }
7771 return N;
7772 }
7775 MVT FVT = VT.getVectorElementType().getSimpleVT();
7776 assert(FVT == MVT::f32 || FVT == MVT::f16);
7777 MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
7778 for (unsigned i = 0; i < NumElts; ++i)
7779 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, IVT,
7780 Op.getOperand(i)));
7781 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), IVT, NumElts);
7782 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
7783 Val = LowerBUILD_VECTOR(Val, DAG, ST);
7784 if (Val.getNode())
7785 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
7786 }
7787 if (usesOnlyOneValue) {
7788 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
7789 if (isConstant && Val.getNode())
7790 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
7791 }
7792 }
7793
7794 // If all elements are constants and the case above didn't get hit, fall back
7795 // to the default expansion, which will generate a load from the constant
7796 // pool.
7797 if (isConstant)
7798 return SDValue();
7799
7800 // Reconstruct the BUILDVECTOR to one of the legal shuffles (such as vext and
7801 // vmovn). Empirical tests suggest this is rarely worth it for vectors of
7802 // length <= 2.
7803 if (NumElts >= 4)
7804 if (SDValue shuffle = ReconstructShuffle(Op, DAG))
7805 return shuffle;
7806
7807 // Attempt to turn a buildvector of scalar fptrunc's or fpext's back into
7808 // VCVT's
7809 if (SDValue VCVT = LowerBuildVectorOfFPTrunc(Op, DAG, Subtarget))
7810 return VCVT;
7811 if (SDValue VCVT = LowerBuildVectorOfFPExt(Op, DAG, Subtarget))
7812 return VCVT;
7813
7814 if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
7815 // If we haven't found an efficient lowering, try splitting a 128-bit vector
7816 // into two 64-bit vectors; we might discover a better way to lower it.
7817 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
7818 EVT ExtVT = VT.getVectorElementType();
7819 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
7820 SDValue Lower = DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[0], NumElts / 2));
7821 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
7822 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
7823 SDValue Upper =
7824 DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[NumElts / 2], NumElts / 2));
7825 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
7826 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
7827 if (Lower && Upper)
7828 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
7829 }
7830
7831 // Vectors with 32- or 64-bit elements can be built by directly assigning
7832 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
7833 // will be legalized.
7834 if (EltSize >= 32) {
7835 // Do the expansion with floating-point types, since that is what the VFP
7836 // registers are defined to use, and since i64 is not legal.
7837 EVT EltVT = EVT::getFloatingPointVT(EltSize);
7838 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
7840 for (unsigned i = 0; i < NumElts; ++i)
7841 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
7842 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
7843 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
7844 }
7845
7846 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
7847 // know the default expansion would otherwise fall back on something even
7848 // worse. For a vector with one or two non-undef values, that's
7849 // scalar_to_vector for the elements followed by a shuffle (provided the
7850 // shuffle is valid for the target) and materialization element by element
7851 // on the stack followed by a load for everything else.
7852 if (!isConstant && !usesOnlyOneValue) {
7853 SDValue Vec = DAG.getUNDEF(VT);
7854 for (unsigned i = 0 ; i < NumElts; ++i) {
7855 SDValue V = Op.getOperand(i);
7856 if (V.isUndef())
7857 continue;
7858 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
7859 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
7860 }
7861 return Vec;
7862 }
7863
7864 return SDValue();
7865}
7866
7867// Gather data to see if the operation can be modelled as a
7868// shuffle in combination with VEXTs.
7869SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
7870 SelectionDAG &DAG) const {
7871 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7872 SDLoc dl(Op);
7873 EVT VT = Op.getValueType();
7874 unsigned NumElts = VT.getVectorNumElements();
7875
7876 struct ShuffleSourceInfo {
7877 SDValue Vec;
7878 unsigned MinElt = std::numeric_limits<unsigned>::max();
7879 unsigned MaxElt = 0;
7880
7881 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
7882 // be compatible with the shuffle we intend to construct. As a result
7883 // ShuffleVec will be some sliding window into the original Vec.
7884 SDValue ShuffleVec;
7885
7886 // Code should guarantee that element i in Vec starts at element "WindowBase
7887 // + i * WindowScale in ShuffleVec".
7888 int WindowBase = 0;
7889 int WindowScale = 1;
7890
7891 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
7892
7893 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
7894 };
7895
7896 // First gather all vectors used as an immediate source for this BUILD_VECTOR
7897 // node.
7899 for (unsigned i = 0; i < NumElts; ++i) {
7900 SDValue V = Op.getOperand(i);
7901 if (V.isUndef())
7902 continue;
7903 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
7904 // A shuffle can only come from building a vector from various
7905 // elements of other vectors.
7906 return SDValue();
7907 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
7908 // Furthermore, shuffles require a constant mask, whereas extractelts
7909 // accept variable indices.
7910 return SDValue();
7911 }
7912
7913 // Add this element source to the list if it's not already there.
7914 SDValue SourceVec = V.getOperand(0);
7915 auto Source = llvm::find(Sources, SourceVec);
7916 if (Source == Sources.end())
7917 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
7918
7919 // Update the minimum and maximum lane number seen.
7920 unsigned EltNo = V.getConstantOperandVal(1);
7921 Source->MinElt = std::min(Source->MinElt, EltNo);
7922 Source->MaxElt = std::max(Source->MaxElt, EltNo);
7923 }
7924
7925 // Currently only do something sane when at most two source vectors
7926 // are involved.
7927 if (Sources.size() > 2)
7928 return SDValue();
7929
7930 // Find out the smallest element size among result and two sources, and use
7931 // it as element size to build the shuffle_vector.
7932 EVT SmallestEltTy = VT.getVectorElementType();
7933 for (auto &Source : Sources) {
7934 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
7935 if (SrcEltTy.bitsLT(SmallestEltTy))
7936 SmallestEltTy = SrcEltTy;
7937 }
7938 unsigned ResMultiplier =
7939 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
7940 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
7941 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
7942
7943 // If the source vector is too wide or too narrow, we may nevertheless be able
7944 // to construct a compatible shuffle either by concatenating it with UNDEF or
7945 // extracting a suitable range of elements.
7946 for (auto &Src : Sources) {
7947 EVT SrcVT = Src.ShuffleVec.getValueType();
7948
7949 uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
7950 uint64_t VTSize = VT.getFixedSizeInBits();
7951 if (SrcVTSize == VTSize)
7952 continue;
7953
7954 // This stage of the search produces a source with the same element type as
7955 // the original, but with a total width matching the BUILD_VECTOR output.
7956 EVT EltVT = SrcVT.getVectorElementType();
7957 unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
7958 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
7959
7960 if (SrcVTSize < VTSize) {
7961 if (2 * SrcVTSize != VTSize)
7962 return SDValue();
7963 // We can pad out the smaller vector for free, so if it's part of a
7964 // shuffle...
7965 Src.ShuffleVec =
7966 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
7967 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
7968 continue;
7969 }
7970
7971 if (SrcVTSize != 2 * VTSize)
7972 return SDValue();
7973
7974 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
7975 // Span too large for a VEXT to cope
7976 return SDValue();
7977 }
7978
7979 if (Src.MinElt >= NumSrcElts) {
7980 // The extraction can just take the second half
7981 Src.ShuffleVec =
7982 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
7983 DAG.getConstant(NumSrcElts, dl, MVT::i32));
7984 Src.WindowBase = -NumSrcElts;
7985 } else if (Src.MaxElt < NumSrcElts) {
7986 // The extraction can just take the first half
7987 Src.ShuffleVec =
7988 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
7989 DAG.getConstant(0, dl, MVT::i32));
7990 } else {
7991 // An actual VEXT is needed
7992 SDValue VEXTSrc1 =
7993 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
7994 DAG.getConstant(0, dl, MVT::i32));
7995 SDValue VEXTSrc2 =
7996 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
7997 DAG.getConstant(NumSrcElts, dl, MVT::i32));
7998
7999 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
8000 VEXTSrc2,
8001 DAG.getConstant(Src.MinElt, dl, MVT::i32));
8002 Src.WindowBase = -Src.MinElt;
8003 }
8004 }
8005
8006 // Another possible incompatibility occurs from the vector element types. We
8007 // can fix this by bitcasting the source vectors to the same type we intend
8008 // for the shuffle.
8009 for (auto &Src : Sources) {
8010 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8011 if (SrcEltTy == SmallestEltTy)
8012 continue;
8013 assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
8014 Src.ShuffleVec = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, ShuffleVT, Src.ShuffleVec);
8015 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
8016 Src.WindowBase *= Src.WindowScale;
8017 }
8018
8019 // Final check before we try to actually produce a shuffle.
8020 LLVM_DEBUG({
8021 for (auto Src : Sources)
8022 assert(Src.ShuffleVec.getValueType() == ShuffleVT);
8023 });
8024
8025 // The stars all align, our next step is to produce the mask for the shuffle.
8026 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
8027 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
8028 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
8029 SDValue Entry = Op.getOperand(i);
8030 if (Entry.isUndef())
8031 continue;
8032
8033 auto Src = llvm::find(Sources, Entry.getOperand(0));
8034 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8035
8036 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
8037 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
8038 // segment.
8039 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8040 int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
8041 VT.getScalarSizeInBits());
8042 int LanesDefined = BitsDefined / BitsPerShuffleLane;
8043
8044 // This source is expected to fill ResMultiplier lanes of the final shuffle,
8045 // starting at the appropriate offset.
8046 int *LaneMask = &Mask[i * ResMultiplier];
8047
8048 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8049 ExtractBase += NumElts * (Src - Sources.begin());
8050 for (int j = 0; j < LanesDefined; ++j)
8051 LaneMask[j] = ExtractBase + j;
8052 }
8053
8054
8055 // We can't handle more than two sources. This should have already
8056 // been checked before this point.
8057 assert(Sources.size() <= 2 && "Too many sources!");
8058
8059 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
8060 for (unsigned i = 0; i < Sources.size(); ++i)
8061 ShuffleOps[i] = Sources[i].ShuffleVec;
8062
8063 SDValue Shuffle = buildLegalVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
8064 ShuffleOps[1], Mask, DAG);
8065 if (!Shuffle)
8066 return SDValue();
8067 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Shuffle);
8068}
8069
8071 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8080 OP_VUZPL, // VUZP, left result
8081 OP_VUZPR, // VUZP, right result
8082 OP_VZIPL, // VZIP, left result
8083 OP_VZIPR, // VZIP, right result
8084 OP_VTRNL, // VTRN, left result
8085 OP_VTRNR // VTRN, right result
8086};
8087
8088static bool isLegalMVEShuffleOp(unsigned PFEntry) {
8089 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8090 switch (OpNum) {
8091 case OP_COPY:
8092 case OP_VREV:
8093 case OP_VDUP0:
8094 case OP_VDUP1:
8095 case OP_VDUP2:
8096 case OP_VDUP3:
8097 return true;
8098 }
8099 return false;
8100}
8101
8102/// isShuffleMaskLegal - Targets can use this to indicate that they only
8103/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
8104/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
8105/// are assumed to be legal.
8107 if (VT.getVectorNumElements() == 4 &&
8108 (VT.is128BitVector() || VT.is64BitVector())) {
8109 unsigned PFIndexes[4];
8110 for (unsigned i = 0; i != 4; ++i) {
8111 if (M[i] < 0)
8112 PFIndexes[i] = 8;
8113 else
8114 PFIndexes[i] = M[i];
8115 }
8116
8117 // Compute the index in the perfect shuffle table.
8118 unsigned PFTableIndex =
8119 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8120 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8121 unsigned Cost = (PFEntry >> 30);
8122
8123 if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
8124 return true;
8125 }
8126
8127 bool ReverseVEXT, isV_UNDEF;
8128 unsigned Imm, WhichResult;
8129
8130 unsigned EltSize = VT.getScalarSizeInBits();
8131 if (EltSize >= 32 ||
8133 ShuffleVectorInst::isIdentityMask(M, M.size()) ||
8134 isVREVMask(M, VT, 64) ||
8135 isVREVMask(M, VT, 32) ||
8136 isVREVMask(M, VT, 16))
8137 return true;
8138 else if (Subtarget->hasNEON() &&
8139 (isVEXTMask(M, VT, ReverseVEXT, Imm) ||
8140 isVTBLMask(M, VT) ||
8141 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
8142 return true;
8143 else if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8144 isReverseMask(M, VT))
8145 return true;
8146 else if (Subtarget->hasMVEIntegerOps() &&
8147 (isVMOVNMask(M, VT, true, false) ||
8148 isVMOVNMask(M, VT, false, false) || isVMOVNMask(M, VT, true, true)))
8149 return true;
8150 else if (Subtarget->hasMVEIntegerOps() &&
8151 (isTruncMask(M, VT, false, false) ||
8152 isTruncMask(M, VT, false, true) ||
8153 isTruncMask(M, VT, true, false) || isTruncMask(M, VT, true, true)))
8154 return true;
8155 else
8156 return false;
8157}
8158
8159/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8160/// the specified operations to build the shuffle.
8162 SDValue RHS, SelectionDAG &DAG,
8163 const SDLoc &dl) {
8164 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8165 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8166 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8167
8168 if (OpNum == OP_COPY) {
8169 if (LHSID == (1*9+2)*9+3) return LHS;
8170 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8171 return RHS;
8172 }
8173
8174 SDValue OpLHS, OpRHS;
8175 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8176 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8177 EVT VT = OpLHS.getValueType();
8178
8179 switch (OpNum) {
8180 default: llvm_unreachable("Unknown shuffle opcode!");
8181 case OP_VREV:
8182 // VREV divides the vector in half and swaps within the half.
8183 if (VT.getScalarSizeInBits() == 32)
8184 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
8185 // vrev <4 x i16> -> VREV32
8186 if (VT.getScalarSizeInBits() == 16)
8187 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
8188 // vrev <4 x i8> -> VREV16
8189 assert(VT.getScalarSizeInBits() == 8);
8190 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
8191 case OP_VDUP0:
8192 case OP_VDUP1:
8193 case OP_VDUP2:
8194 case OP_VDUP3:
8195 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8196 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
8197 case OP_VEXT1:
8198 case OP_VEXT2:
8199 case OP_VEXT3:
8200 return DAG.getNode(ARMISD::VEXT, dl, VT,
8201 OpLHS, OpRHS,
8202 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
8203 case OP_VUZPL:
8204 case OP_VUZPR:
8205 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
8206 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
8207 case OP_VZIPL:
8208 case OP_VZIPR:
8209 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
8210 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
8211 case OP_VTRNL:
8212 case OP_VTRNR:
8213 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
8214 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
8215 }
8216}
8217
8219 ArrayRef<int> ShuffleMask,
8220 SelectionDAG &DAG) {
8221 // Check to see if we can use the VTBL instruction.
8222 SDValue V1 = Op.getOperand(0);
8223 SDValue V2 = Op.getOperand(1);
8224 SDLoc DL(Op);
8225
8226 SmallVector<SDValue, 8> VTBLMask;
8227 for (int I : ShuffleMask)
8228 VTBLMask.push_back(DAG.getSignedConstant(I, DL, MVT::i32));
8229
8230 if (V2.getNode()->isUndef())
8231 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
8232 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8233
8234 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
8235 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8236}
8237
8239 SDLoc DL(Op);
8240 EVT VT = Op.getValueType();
8241
8242 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8243 "Expect an v8i16/v16i8 type");
8244 SDValue OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, Op.getOperand(0));
8245 // For a v16i8 type: After the VREV, we have got <7, ..., 0, 15, ..., 8>. Now,
8246 // extract the first 8 bytes into the top double word and the last 8 bytes
8247 // into the bottom double word, through a new vector shuffle that will be
8248 // turned into a VEXT on Neon, or a couple of VMOVDs on MVE.
8249 std::vector<int> NewMask;
8250 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8251 NewMask.push_back(VT.getVectorNumElements() / 2 + i);
8252 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8253 NewMask.push_back(i);
8254 return DAG.getVectorShuffle(VT, DL, OpLHS, OpLHS, NewMask);
8255}
8256
8258 switch (VT.getSimpleVT().SimpleTy) {
8259 case MVT::v2i1:
8260 return MVT::v2f64;
8261 case MVT::v4i1:
8262 return MVT::v4i32;
8263 case MVT::v8i1:
8264 return MVT::v8i16;
8265 case MVT::v16i1:
8266 return MVT::v16i8;
8267 default:
8268 llvm_unreachable("Unexpected vector predicate type");
8269 }
8270}
8271
8273 SelectionDAG &DAG) {
8274 // Converting from boolean predicates to integers involves creating a vector
8275 // of all ones or all zeroes and selecting the lanes based upon the real
8276 // predicate.
8278 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), dl, MVT::i32);
8279 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllOnes);
8280
8281 SDValue AllZeroes =
8282 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0x0), dl, MVT::i32);
8283 AllZeroes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes);
8284
8285 // Get full vector type from predicate type
8287
8288 SDValue RecastV1;
8289 // If the real predicate is an v8i1 or v4i1 (not v16i1) then we need to recast
8290 // this to a v16i1. This cannot be done with an ordinary bitcast because the
8291 // sizes are not the same. We have to use a MVE specific PREDICATE_CAST node,
8292 // since we know in hardware the sizes are really the same.
8293 if (VT != MVT::v16i1)
8294 RecastV1 = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred);
8295 else
8296 RecastV1 = Pred;
8297
8298 // Select either all ones or zeroes depending upon the real predicate bits.
8299 SDValue PredAsVector =
8300 DAG.getNode(ISD::VSELECT, dl, MVT::v16i8, RecastV1, AllOnes, AllZeroes);
8301
8302 // Recast our new predicate-as-integer v16i8 vector into something
8303 // appropriate for the shuffle, i.e. v4i32 for a real v4i1 predicate.
8304 return DAG.getNode(ISD::BITCAST, dl, NewVT, PredAsVector);
8305}
8306
8308 const ARMSubtarget *ST) {
8309 EVT VT = Op.getValueType();
8311 ArrayRef<int> ShuffleMask = SVN->getMask();
8312
8313 assert(ST->hasMVEIntegerOps() &&
8314 "No support for vector shuffle of boolean predicates");
8315
8316 SDValue V1 = Op.getOperand(0);
8317 SDValue V2 = Op.getOperand(1);
8318 SDLoc dl(Op);
8319 if (isReverseMask(ShuffleMask, VT)) {
8320 SDValue cast = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, V1);
8321 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, cast);
8322 SDValue srl = DAG.getNode(ISD::SRL, dl, MVT::i32, rbit,
8323 DAG.getConstant(16, dl, MVT::i32));
8324 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, srl);
8325 }
8326
8327 // Until we can come up with optimised cases for every single vector
8328 // shuffle in existence we have chosen the least painful strategy. This is
8329 // to essentially promote the boolean predicate to a 8-bit integer, where
8330 // each predicate represents a byte. Then we fall back on a normal integer
8331 // vector shuffle and convert the result back into a predicate vector. In
8332 // many cases the generated code might be even better than scalar code
8333 // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit
8334 // fields in a register into 8 other arbitrary 2-bit fields!
8335 SDValue PredAsVector1 = PromoteMVEPredVector(dl, V1, VT, DAG);
8336 EVT NewVT = PredAsVector1.getValueType();
8337 SDValue PredAsVector2 = V2.isUndef() ? DAG.getUNDEF(NewVT)
8338 : PromoteMVEPredVector(dl, V2, VT, DAG);
8339 assert(PredAsVector2.getValueType() == NewVT &&
8340 "Expected identical vector type in expanded i1 shuffle!");
8341
8342 // Do the shuffle!
8343 SDValue Shuffled = DAG.getVectorShuffle(NewVT, dl, PredAsVector1,
8344 PredAsVector2, ShuffleMask);
8345
8346 // Now return the result of comparing the shuffled vector with zero,
8347 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1
8348 // we convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
8349 if (VT == MVT::v2i1) {
8350 SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Shuffled);
8351 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
8352 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8353 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
8354 }
8355 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
8356 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8357}
8358
8360 ArrayRef<int> ShuffleMask,
8361 SelectionDAG &DAG) {
8362 // Attempt to lower the vector shuffle using as many whole register movs as
8363 // possible. This is useful for types smaller than 32bits, which would
8364 // often otherwise become a series for grp movs.
8365 SDLoc dl(Op);
8366 EVT VT = Op.getValueType();
8367 if (VT.getScalarSizeInBits() >= 32)
8368 return SDValue();
8369
8370 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8371 "Unexpected vector type");
8372 int NumElts = VT.getVectorNumElements();
8373 int QuarterSize = NumElts / 4;
8374 // The four final parts of the vector, as i32's
8375 SDValue Parts[4];
8376
8377 // Look for full lane vmovs like <0,1,2,3> or <u,5,6,7> etc, (but not
8378 // <u,u,u,u>), returning the vmov lane index
8379 auto getMovIdx = [](ArrayRef<int> ShuffleMask, int Start, int Length) {
8380 // Detect which mov lane this would be from the first non-undef element.
8381 int MovIdx = -1;
8382 for (int i = 0; i < Length; i++) {
8383 if (ShuffleMask[Start + i] >= 0) {
8384 if (ShuffleMask[Start + i] % Length != i)
8385 return -1;
8386 MovIdx = ShuffleMask[Start + i] / Length;
8387 break;
8388 }
8389 }
8390 // If all items are undef, leave this for other combines
8391 if (MovIdx == -1)
8392 return -1;
8393 // Check the remaining values are the correct part of the same mov
8394 for (int i = 1; i < Length; i++) {
8395 if (ShuffleMask[Start + i] >= 0 &&
8396 (ShuffleMask[Start + i] / Length != MovIdx ||
8397 ShuffleMask[Start + i] % Length != i))
8398 return -1;
8399 }
8400 return MovIdx;
8401 };
8402
8403 for (int Part = 0; Part < 4; ++Part) {
8404 // Does this part look like a mov
8405 int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);
8406 if (Elt != -1) {
8407 SDValue Input = Op->getOperand(0);
8408 if (Elt >= 4) {
8409 Input = Op->getOperand(1);
8410 Elt -= 4;
8411 }
8412 SDValue BitCast = DAG.getBitcast(MVT::v4f32, Input);
8413 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, BitCast,
8414 DAG.getConstant(Elt, dl, MVT::i32));
8415 }
8416 }
8417
8418 // Nothing interesting found, just return
8419 if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])
8420 return SDValue();
8421
8422 // The other parts need to be built with the old shuffle vector, cast to a
8423 // v4i32 and extract_vector_elts
8424 if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {
8425 SmallVector<int, 16> NewShuffleMask;
8426 for (int Part = 0; Part < 4; ++Part)
8427 for (int i = 0; i < QuarterSize; i++)
8428 NewShuffleMask.push_back(
8429 Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
8430 SDValue NewShuffle = DAG.getVectorShuffle(
8431 VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
8432 SDValue BitCast = DAG.getBitcast(MVT::v4f32, NewShuffle);
8433
8434 for (int Part = 0; Part < 4; ++Part)
8435 if (!Parts[Part])
8436 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32,
8437 BitCast, DAG.getConstant(Part, dl, MVT::i32));
8438 }
8439 // Build a vector out of the various parts and bitcast it back to the original
8440 // type.
8441 SDValue NewVec = DAG.getNode(ARMISD::BUILD_VECTOR, dl, MVT::v4f32, Parts);
8442 return DAG.getBitcast(VT, NewVec);
8443}
8444
8446 ArrayRef<int> ShuffleMask,
8447 SelectionDAG &DAG) {
8448 SDValue V1 = Op.getOperand(0);
8449 SDValue V2 = Op.getOperand(1);
8450 EVT VT = Op.getValueType();
8451 unsigned NumElts = VT.getVectorNumElements();
8452
8453 // An One-Off Identity mask is one that is mostly an identity mask from as
8454 // single source but contains a single element out-of-place, either from a
8455 // different vector or from another position in the same vector. As opposed to
8456 // lowering this via a ARMISD::BUILD_VECTOR we can generate an extract/insert
8457 // pair directly.
8458 auto isOneOffIdentityMask = [](ArrayRef<int> Mask, EVT VT, int BaseOffset,
8459 int &OffElement) {
8460 OffElement = -1;
8461 int NonUndef = 0;
8462 for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
8463 if (Mask[i] == -1)
8464 continue;
8465 NonUndef++;
8466 if (Mask[i] != i + BaseOffset) {
8467 if (OffElement == -1)
8468 OffElement = i;
8469 else
8470 return false;
8471 }
8472 }
8473 return NonUndef > 2 && OffElement != -1;
8474 };
8475 int OffElement;
8476 SDValue VInput;
8477 if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))
8478 VInput = V1;
8479 else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))
8480 VInput = V2;
8481 else
8482 return SDValue();
8483
8484 SDLoc dl(Op);
8485 EVT SVT = VT.getScalarType() == MVT::i8 || VT.getScalarType() == MVT::i16
8486 ? MVT::i32
8487 : VT.getScalarType();
8488 SDValue Elt = DAG.getNode(
8489 ISD::EXTRACT_VECTOR_ELT, dl, SVT,
8490 ShuffleMask[OffElement] < (int)NumElts ? V1 : V2,
8491 DAG.getVectorIdxConstant(ShuffleMask[OffElement] % NumElts, dl));
8492 return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, VInput, Elt,
8493 DAG.getVectorIdxConstant(OffElement % NumElts, dl));
8494}
8495
8497 const ARMSubtarget *ST) {
8498 SDValue V1 = Op.getOperand(0);
8499 SDValue V2 = Op.getOperand(1);
8500 SDLoc dl(Op);
8501 EVT VT = Op.getValueType();
8503 unsigned EltSize = VT.getScalarSizeInBits();
8504
8505 if (ST->hasMVEIntegerOps() && EltSize == 1)
8506 return LowerVECTOR_SHUFFLE_i1(Op, DAG, ST);
8507
8508 // Convert shuffles that are directly supported on NEON to target-specific
8509 // DAG nodes, instead of keeping them as shuffles and matching them again
8510 // during code selection. This is more efficient and avoids the possibility
8511 // of inconsistencies between legalization and selection.
8512 // FIXME: floating-point vectors should be canonicalized to integer vectors
8513 // of the same time so that they get CSEd properly.
8514 ArrayRef<int> ShuffleMask = SVN->getMask();
8515
8516 if (EltSize <= 32) {
8517 if (SVN->isSplat()) {
8518 int Lane = SVN->getSplatIndex();
8519 // If this is undef splat, generate it via "just" vdup, if possible.
8520 if (Lane == -1) Lane = 0;
8521
8522 // Test if V1 is a SCALAR_TO_VECTOR.
8523 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
8524 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8525 }
8526 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
8527 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
8528 // reaches it).
8529 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
8531 bool IsScalarToVector = true;
8532 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
8533 if (!V1.getOperand(i).isUndef()) {
8534 IsScalarToVector = false;
8535 break;
8536 }
8537 if (IsScalarToVector)
8538 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8539 }
8540 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
8541 DAG.getConstant(Lane, dl, MVT::i32));
8542 }
8543
8544 bool ReverseVEXT = false;
8545 unsigned Imm = 0;
8546 if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
8547 if (ReverseVEXT)
8548 std::swap(V1, V2);
8549 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
8550 DAG.getConstant(Imm, dl, MVT::i32));
8551 }
8552
8553 if (isVREVMask(ShuffleMask, VT, 64))
8554 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
8555 if (isVREVMask(ShuffleMask, VT, 32))
8556 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
8557 if (isVREVMask(ShuffleMask, VT, 16))
8558 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
8559
8560 if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
8561 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
8562 DAG.getConstant(Imm, dl, MVT::i32));
8563 }
8564
8565 // Check for Neon shuffles that modify both input vectors in place.
8566 // If both results are used, i.e., if there are two shuffles with the same
8567 // source operands and with masks corresponding to both results of one of
8568 // these operations, DAG memoization will ensure that a single node is
8569 // used for both shuffles.
8570 unsigned WhichResult = 0;
8571 bool isV_UNDEF = false;
8572 if (ST->hasNEON()) {
8573 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8574 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
8575 if (isV_UNDEF)
8576 V2 = V1;
8577 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
8578 .getValue(WhichResult);
8579 }
8580 }
8581 if (ST->hasMVEIntegerOps()) {
8582 if (isVMOVNMask(ShuffleMask, VT, false, false))
8583 return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1,
8584 DAG.getConstant(0, dl, MVT::i32));
8585 if (isVMOVNMask(ShuffleMask, VT, true, false))
8586 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2,
8587 DAG.getConstant(1, dl, MVT::i32));
8588 if (isVMOVNMask(ShuffleMask, VT, true, true))
8589 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V1,
8590 DAG.getConstant(1, dl, MVT::i32));
8591 }
8592
8593 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
8594 // shuffles that produce a result larger than their operands with:
8595 // shuffle(concat(v1, undef), concat(v2, undef))
8596 // ->
8597 // shuffle(concat(v1, v2), undef)
8598 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
8599 //
8600 // This is useful in the general case, but there are special cases where
8601 // native shuffles produce larger results: the two-result ops.
8602 //
8603 // Look through the concat when lowering them:
8604 // shuffle(concat(v1, v2), undef)
8605 // ->
8606 // concat(VZIP(v1, v2):0, :1)
8607 //
8608 if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
8609 SDValue SubV1 = V1->getOperand(0);
8610 SDValue SubV2 = V1->getOperand(1);
8611 EVT SubVT = SubV1.getValueType();
8612
8613 // We expect these to have been canonicalized to -1.
8614 assert(llvm::all_of(ShuffleMask, [&](int i) {
8615 return i < (int)VT.getVectorNumElements();
8616 }) && "Unexpected shuffle index into UNDEF operand!");
8617
8618 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8619 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
8620 if (isV_UNDEF)
8621 SubV2 = SubV1;
8622 assert((WhichResult == 0) &&
8623 "In-place shuffle of concat can only have one result!");
8624 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
8625 SubV1, SubV2);
8626 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
8627 Res.getValue(1));
8628 }
8629 }
8630 }
8631
8632 if (ST->hasMVEIntegerOps() && EltSize <= 32) {
8633 if (SDValue V = LowerVECTOR_SHUFFLEUsingOneOff(Op, ShuffleMask, DAG))
8634 return V;
8635
8636 for (bool Top : {false, true}) {
8637 for (bool SingleSource : {false, true}) {
8638 if (isTruncMask(ShuffleMask, VT, Top, SingleSource)) {
8639 MVT FromSVT = MVT::getIntegerVT(EltSize * 2);
8640 MVT FromVT = MVT::getVectorVT(FromSVT, ShuffleMask.size() / 2);
8641 SDValue Lo = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT, V1);
8642 SDValue Hi = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT,
8643 SingleSource ? V1 : V2);
8644 if (Top) {
8645 SDValue Amt = DAG.getConstant(EltSize, dl, FromVT);
8646 Lo = DAG.getNode(ISD::SRL, dl, FromVT, Lo, Amt);
8647 Hi = DAG.getNode(ISD::SRL, dl, FromVT, Hi, Amt);
8648 }
8649 return DAG.getNode(ARMISD::MVETRUNC, dl, VT, Lo, Hi);
8650 }
8651 }
8652 }
8653 }
8654
8655 // If the shuffle is not directly supported and it has 4 elements, use
8656 // the PerfectShuffle-generated table to synthesize it from other shuffles.
8657 unsigned NumElts = VT.getVectorNumElements();
8658 if (NumElts == 4) {
8659 unsigned PFIndexes[4];
8660 for (unsigned i = 0; i != 4; ++i) {
8661 if (ShuffleMask[i] < 0)
8662 PFIndexes[i] = 8;
8663 else
8664 PFIndexes[i] = ShuffleMask[i];
8665 }
8666
8667 // Compute the index in the perfect shuffle table.
8668 unsigned PFTableIndex =
8669 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8670 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8671 unsigned Cost = (PFEntry >> 30);
8672
8673 if (Cost <= 4) {
8674 if (ST->hasNEON())
8675 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8676 else if (isLegalMVEShuffleOp(PFEntry)) {
8677 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8678 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8679 unsigned PFEntryLHS = PerfectShuffleTable[LHSID];
8680 unsigned PFEntryRHS = PerfectShuffleTable[RHSID];
8681 if (isLegalMVEShuffleOp(PFEntryLHS) && isLegalMVEShuffleOp(PFEntryRHS))
8682 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8683 }
8684 }
8685 }
8686
8687 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
8688 if (EltSize >= 32) {
8689 // Do the expansion with floating-point types, since that is what the VFP
8690 // registers are defined to use, and since i64 is not legal.
8691 EVT EltVT = EVT::getFloatingPointVT(EltSize);
8692 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8693 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
8694 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
8696 for (unsigned i = 0; i < NumElts; ++i) {
8697 if (ShuffleMask[i] < 0)
8698 Ops.push_back(DAG.getUNDEF(EltVT));
8699 else
8700 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
8701 ShuffleMask[i] < (int)NumElts ? V1 : V2,
8702 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
8703 dl, MVT::i32)));
8704 }
8705 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8706 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8707 }
8708
8709 if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8710 isReverseMask(ShuffleMask, VT))
8711 return LowerReverse_VECTOR_SHUFFLE(Op, DAG);
8712
8713 if (ST->hasNEON() && VT == MVT::v8i8)
8714 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
8715 return NewOp;
8716
8717 if (ST->hasMVEIntegerOps())
8718 if (SDValue NewOp = LowerVECTOR_SHUFFLEUsingMovs(Op, ShuffleMask, DAG))
8719 return NewOp;
8720
8721 return SDValue();
8722}
8723
8725 const ARMSubtarget *ST) {
8726 EVT VecVT = Op.getOperand(0).getValueType();
8727 SDLoc dl(Op);
8728
8729 assert(ST->hasMVEIntegerOps() &&
8730 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
8731
8732 SDValue Conv =
8733 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
8734 unsigned Lane = Op.getConstantOperandVal(2);
8735 unsigned LaneWidth =
8737 unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
8738 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32,
8739 Op.getOperand(1), DAG.getValueType(MVT::i1));
8740 SDValue BFI = DAG.getNode(ARMISD::BFI, dl, MVT::i32, Conv, Ext,
8741 DAG.getConstant(~Mask, dl, MVT::i32));
8742 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), BFI);
8743}
8744
8745SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
8746 SelectionDAG &DAG) const {
8747 // INSERT_VECTOR_ELT is legal only for immediate indexes.
8748 SDValue Lane = Op.getOperand(2);
8749 if (!isa<ConstantSDNode>(Lane))
8750 return SDValue();
8751
8752 SDValue Elt = Op.getOperand(1);
8753 EVT EltVT = Elt.getValueType();
8754
8755 if (Subtarget->hasMVEIntegerOps() &&
8756 Op.getValueType().getScalarSizeInBits() == 1)
8757 return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);
8758
8759 if (getTypeAction(*DAG.getContext(), EltVT) ==
8761 // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
8762 // but the type system will try to do that if we don't intervene.
8763 // Reinterpret any such vector-element insertion as one with the
8764 // corresponding integer types.
8765
8766 SDLoc dl(Op);
8767
8768 EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
8769 assert(getTypeAction(*DAG.getContext(), IEltVT) !=
8771
8772 SDValue VecIn = Op.getOperand(0);
8773 EVT VecVT = VecIn.getValueType();
8774 EVT IVecVT = EVT::getVectorVT(*DAG.getContext(), IEltVT,
8775 VecVT.getVectorNumElements());
8776
8777 SDValue IElt = DAG.getNode(ISD::BITCAST, dl, IEltVT, Elt);
8778 SDValue IVecIn = DAG.getNode(ISD::BITCAST, dl, IVecVT, VecIn);
8779 SDValue IVecOut = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVecVT,
8780 IVecIn, IElt, Lane);
8781 return DAG.getNode(ISD::BITCAST, dl, VecVT, IVecOut);
8782 }
8783
8784 return Op;
8785}
8786
8788 const ARMSubtarget *ST) {
8789 EVT VecVT = Op.getOperand(0).getValueType();
8790 SDLoc dl(Op);
8791
8792 assert(ST->hasMVEIntegerOps() &&
8793 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
8794
8795 SDValue Conv =
8796 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
8797 unsigned Lane = Op.getConstantOperandVal(1);
8798 unsigned LaneWidth =
8800 SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv,
8801 DAG.getConstant(Lane * LaneWidth, dl, MVT::i32));
8802 return Shift;
8803}
8804
8806 const ARMSubtarget *ST) {
8807 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
8808 SDValue Lane = Op.getOperand(1);
8809 if (!isa<ConstantSDNode>(Lane))
8810 return SDValue();
8811
8812 SDValue Vec = Op.getOperand(0);
8813 EVT VT = Vec.getValueType();
8814
8815 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
8816 return LowerEXTRACT_VECTOR_ELT_i1(Op, DAG, ST);
8817
8818 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
8819 SDLoc dl(Op);
8820 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
8821 }
8822
8823 return Op;
8824}
8825
8827 const ARMSubtarget *ST) {
8828 SDLoc dl(Op);
8829 assert(Op.getValueType().getScalarSizeInBits() == 1 &&
8830 "Unexpected custom CONCAT_VECTORS lowering");
8831 assert(isPowerOf2_32(Op.getNumOperands()) &&
8832 "Unexpected custom CONCAT_VECTORS lowering");
8833 assert(ST->hasMVEIntegerOps() &&
8834 "CONCAT_VECTORS lowering only supported for MVE");
8835
8836 auto ConcatPair = [&](SDValue V1, SDValue V2) {
8837 EVT Op1VT = V1.getValueType();
8838 EVT Op2VT = V2.getValueType();
8839 assert(Op1VT == Op2VT && "Operand types don't match!");
8840 assert((Op1VT == MVT::v2i1 || Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) &&
8841 "Unexpected i1 concat operations!");
8842 EVT VT = Op1VT.getDoubleNumVectorElementsVT(*DAG.getContext());
8843
8844 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
8845 SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
8846
8847 // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
8848 // promoted to v8i16, etc.
8849 MVT ElType =
8851 unsigned NumElts = 2 * Op1VT.getVectorNumElements();
8852
8853 EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
8854 if (Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) {
8855 // Use MVETRUNC to truncate the combined NewV1::NewV2 into the smaller
8856 // ConcatVT.
8857 SDValue ConVec =
8858 DAG.getNode(ARMISD::MVETRUNC, dl, ConcatVT, NewV1, NewV2);
8859 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
8860 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8861 }
8862
8863 // Extract the vector elements from Op1 and Op2 one by one and truncate them
8864 // to be the right size for the destination. For example, if Op1 is v4i1
8865 // then the promoted vector is v4i32. The result of concatenation gives a
8866 // v8i1, which when promoted is v8i16. That means each i32 element from Op1
8867 // needs truncating to i16 and inserting in the result.
8868 auto ExtractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
8869 EVT NewVT = NewV.getValueType();
8870 EVT ConcatVT = ConVec.getValueType();
8871 unsigned ExtScale = 1;
8872 if (NewVT == MVT::v2f64) {
8873 NewV = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, NewV);
8874 ExtScale = 2;
8875 }
8876 for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
8877 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
8878 DAG.getIntPtrConstant(i * ExtScale, dl));
8879 ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
8880 DAG.getConstant(j, dl, MVT::i32));
8881 }
8882 return ConVec;
8883 };
8884 unsigned j = 0;
8885 SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
8886 ConVec = ExtractInto(NewV1, ConVec, j);
8887 ConVec = ExtractInto(NewV2, ConVec, j);
8888
8889 // Now return the result of comparing the subvector with zero, which will
8890 // generate a real predicate, i.e. v4i1, v8i1 or v16i1.
8891 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
8892 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8893 };
8894
8895 // Concat each pair of subvectors and pack into the lower half of the array.
8896 SmallVector<SDValue> ConcatOps(Op->ops());
8897 while (ConcatOps.size() > 1) {
8898 for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
8899 SDValue V1 = ConcatOps[I];
8900 SDValue V2 = ConcatOps[I + 1];
8901 ConcatOps[I / 2] = ConcatPair(V1, V2);
8902 }
8903 ConcatOps.resize(ConcatOps.size() / 2);
8904 }
8905 return ConcatOps[0];
8906}
8907
8909 const ARMSubtarget *ST) {
8910 EVT VT = Op->getValueType(0);
8911 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
8912 return LowerCONCAT_VECTORS_i1(Op, DAG, ST);
8913
8914 // The only time a CONCAT_VECTORS operation can have legal types is when
8915 // two 64-bit vectors are concatenated to a 128-bit vector.
8916 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
8917 "unexpected CONCAT_VECTORS");
8918 SDLoc dl(Op);
8919 SDValue Val = DAG.getUNDEF(MVT::v2f64);
8920 SDValue Op0 = Op.getOperand(0);
8921 SDValue Op1 = Op.getOperand(1);
8922 if (!Op0.isUndef())
8923 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
8924 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
8925 DAG.getIntPtrConstant(0, dl));
8926 if (!Op1.isUndef())
8927 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
8928 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
8929 DAG.getIntPtrConstant(1, dl));
8930 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
8931}
8932
8934 const ARMSubtarget *ST) {
8935 SDValue V1 = Op.getOperand(0);
8936 SDValue V2 = Op.getOperand(1);
8937 SDLoc dl(Op);
8938 EVT VT = Op.getValueType();
8939 EVT Op1VT = V1.getValueType();
8940 unsigned NumElts = VT.getVectorNumElements();
8941 unsigned Index = V2->getAsZExtVal();
8942
8943 assert(VT.getScalarSizeInBits() == 1 &&
8944 "Unexpected custom EXTRACT_SUBVECTOR lowering");
8945 assert(ST->hasMVEIntegerOps() &&
8946 "EXTRACT_SUBVECTOR lowering only supported for MVE");
8947
8948 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
8949
8950 // We now have Op1 promoted to a vector of integers, where v8i1 gets
8951 // promoted to v8i16, etc.
8952
8954
8955 if (NumElts == 2) {
8956 EVT SubVT = MVT::v4i32;
8957 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
8958 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j += 2) {
8959 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
8960 DAG.getIntPtrConstant(i, dl));
8961 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
8962 DAG.getConstant(j, dl, MVT::i32));
8963 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
8964 DAG.getConstant(j + 1, dl, MVT::i32));
8965 }
8966 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, SubVec,
8967 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8968 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
8969 }
8970
8971 EVT SubVT = MVT::getVectorVT(ElType, NumElts);
8972 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
8973 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
8974 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
8975 DAG.getIntPtrConstant(i, dl));
8976 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
8977 DAG.getConstant(j, dl, MVT::i32));
8978 }
8979
8980 // Now return the result of comparing the subvector with zero,
8981 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
8982 return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec,
8983 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8984}
8985
8986// Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0).
8988 const ARMSubtarget *ST) {
8989 assert(ST->hasMVEIntegerOps() && "Expected MVE!");
8990 EVT VT = N->getValueType(0);
8991 assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&
8992 "Expected a vector i1 type!");
8993 SDValue Op = N->getOperand(0);
8994 EVT FromVT = Op.getValueType();
8995 SDLoc DL(N);
8996
8997 SDValue And =
8998 DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT));
8999 return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT),
9000 DAG.getCondCode(ISD::SETNE));
9001}
9002
9004 const ARMSubtarget *Subtarget) {
9005 if (!Subtarget->hasMVEIntegerOps())
9006 return SDValue();
9007
9008 EVT ToVT = N->getValueType(0);
9009 if (ToVT.getScalarType() == MVT::i1)
9010 return LowerTruncatei1(N, DAG, Subtarget);
9011
9012 // MVE does not have a single instruction to perform the truncation of a v4i32
9013 // into the lower half of a v8i16, in the same way that a NEON vmovn would.
9014 // Most of the instructions in MVE follow the 'Beats' system, where moving
9015 // values from different lanes is usually something that the instructions
9016 // avoid.
9017 //
9018 // Instead it has top/bottom instructions such as VMOVLT/B and VMOVNT/B,
9019 // which take a the top/bottom half of a larger lane and extend it (or do the
9020 // opposite, truncating into the top/bottom lane from a larger lane). Note
9021 // that because of the way we widen lanes, a v4i16 is really a v4i32 using the
9022 // bottom 16bits from each vector lane. This works really well with T/B
9023 // instructions, but that doesn't extend to v8i32->v8i16 where the lanes need
9024 // to move order.
9025 //
9026 // But truncates and sext/zext are always going to be fairly common from llvm.
9027 // We have several options for how to deal with them:
9028 // - Wherever possible combine them into an instruction that makes them
9029 // "free". This includes loads/stores, which can perform the trunc as part
9030 // of the memory operation. Or certain shuffles that can be turned into
9031 // VMOVN/VMOVL.
9032 // - Lane Interleaving to transform blocks surrounded by ext/trunc. So
9033 // trunc(mul(sext(a), sext(b))) may become
9034 // VMOVNT(VMUL(VMOVLB(a), VMOVLB(b)), VMUL(VMOVLT(a), VMOVLT(b))). (Which in
9035 // this case can use VMULL). This is performed in the
9036 // MVELaneInterleavingPass.
9037 // - Otherwise we have an option. By default we would expand the
9038 // zext/sext/trunc into a series of lane extract/inserts going via GPR
9039 // registers. One for each vector lane in the vector. This can obviously be
9040 // very expensive.
9041 // - The other option is to use the fact that loads/store can extend/truncate
9042 // to turn a trunc into two truncating stack stores and a stack reload. This
9043 // becomes 3 back-to-back memory operations, but at least that is less than
9044 // all the insert/extracts.
9045 //
9046 // In order to do the last, we convert certain trunc's into MVETRUNC, which
9047 // are either optimized where they can be, or eventually lowered into stack
9048 // stores/loads. This prevents us from splitting a v8i16 trunc into two stores
9049 // two early, where other instructions would be better, and stops us from
9050 // having to reconstruct multiple buildvector shuffles into loads/stores.
9051 if (ToVT != MVT::v8i16 && ToVT != MVT::v16i8)
9052 return SDValue();
9053 EVT FromVT = N->getOperand(0).getValueType();
9054 if (FromVT != MVT::v8i32 && FromVT != MVT::v16i16)
9055 return SDValue();
9056
9057 SDValue Lo, Hi;
9058 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
9059 SDLoc DL(N);
9060 return DAG.getNode(ARMISD::MVETRUNC, DL, ToVT, Lo, Hi);
9061}
9062
9064 const ARMSubtarget *Subtarget) {
9065 if (!Subtarget->hasMVEIntegerOps())
9066 return SDValue();
9067
9068 // See LowerTruncate above for an explanation of MVEEXT/MVETRUNC.
9069
9070 EVT ToVT = N->getValueType(0);
9071 if (ToVT != MVT::v16i32 && ToVT != MVT::v8i32 && ToVT != MVT::v16i16)
9072 return SDValue();
9073 SDValue Op = N->getOperand(0);
9074 EVT FromVT = Op.getValueType();
9075 if (FromVT != MVT::v8i16 && FromVT != MVT::v16i8)
9076 return SDValue();
9077
9078 SDLoc DL(N);
9079 EVT ExtVT = ToVT.getHalfNumVectorElementsVT(*DAG.getContext());
9080 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8)
9081 ExtVT = MVT::v8i16;
9082
9083 unsigned Opcode =
9085 SDValue Ext = DAG.getNode(Opcode, DL, DAG.getVTList(ExtVT, ExtVT), Op);
9086 SDValue Ext1 = Ext.getValue(1);
9087
9088 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8) {
9089 Ext = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext);
9090 Ext1 = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext1);
9091 }
9092
9093 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Ext, Ext1);
9094}
9095
9096/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
9097/// element has been zero/sign-extended, depending on the isSigned parameter,
9098/// from an integer type half its size.
9100 bool isSigned) {
9101 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
9102 EVT VT = N->getValueType(0);
9103 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
9104 SDNode *BVN = N->getOperand(0).getNode();
9105 if (BVN->getValueType(0) != MVT::v4i32 ||
9106 BVN->getOpcode() != ISD::BUILD_VECTOR)
9107 return false;
9108 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9109 unsigned HiElt = 1 - LoElt;
9114 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
9115 return false;
9116 if (isSigned) {
9117 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
9118 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
9119 return true;
9120 } else {
9121 if (Hi0->isZero() && Hi1->isZero())
9122 return true;
9123 }
9124 return false;
9125 }
9126
9127 if (N->getOpcode() != ISD::BUILD_VECTOR)
9128 return false;
9129
9130 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
9131 SDNode *Elt = N->getOperand(i).getNode();
9133 unsigned EltSize = VT.getScalarSizeInBits();
9134 unsigned HalfSize = EltSize / 2;
9135 if (isSigned) {
9136 if (!isIntN(HalfSize, C->getSExtValue()))
9137 return false;
9138 } else {
9139 if (!isUIntN(HalfSize, C->getZExtValue()))
9140 return false;
9141 }
9142 continue;
9143 }
9144 return false;
9145 }
9146
9147 return true;
9148}
9149
9150/// isSignExtended - Check if a node is a vector value that is sign-extended
9151/// or a constant BUILD_VECTOR with sign-extended elements.
9153 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
9154 return true;
9155 if (isExtendedBUILD_VECTOR(N, DAG, true))
9156 return true;
9157 return false;
9158}
9159
9160/// isZeroExtended - Check if a node is a vector value that is zero-extended (or
9161/// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
9163 if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
9165 return true;
9166 if (isExtendedBUILD_VECTOR(N, DAG, false))
9167 return true;
9168 return false;
9169}
9170
9171static EVT getExtensionTo64Bits(const EVT &OrigVT) {
9172 if (OrigVT.getSizeInBits() >= 64)
9173 return OrigVT;
9174
9175 assert(OrigVT.isSimple() && "Expecting a simple value type");
9176
9177 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
9178 switch (OrigSimpleTy) {
9179 default: llvm_unreachable("Unexpected Vector Type");
9180 case MVT::v2i8:
9181 case MVT::v2i16:
9182 return MVT::v2i32;
9183 case MVT::v4i8:
9184 return MVT::v4i16;
9185 }
9186}
9187
9188/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
9189/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
9190/// We insert the required extension here to get the vector to fill a D register.
9192 const EVT &OrigTy,
9193 const EVT &ExtTy,
9194 unsigned ExtOpcode) {
9195 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
9196 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
9197 // 64-bits we need to insert a new extension so that it will be 64-bits.
9198 assert(ExtTy.is128BitVector() && "Unexpected extension size");
9199 if (OrigTy.getSizeInBits() >= 64)
9200 return N;
9201
9202 // Must extend size to at least 64 bits to be used as an operand for VMULL.
9203 EVT NewVT = getExtensionTo64Bits(OrigTy);
9204
9205 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
9206}
9207
9208/// SkipLoadExtensionForVMULL - return a load of the original vector size that
9209/// does not do any sign/zero extension. If the original vector is less
9210/// than 64 bits, an appropriate extension will be added after the load to
9211/// reach a total size of 64 bits. We have to add the extension separately
9212/// because ARM does not have a sign/zero extending load for vectors.
9214 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
9215
9216 // The load already has the right type.
9217 if (ExtendedTy == LD->getMemoryVT())
9218 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
9219 LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),
9220 LD->getMemOperand()->getFlags());
9221
9222 // We need to create a zextload/sextload. We cannot just create a load
9223 // followed by a zext/zext node because LowerMUL is also run during normal
9224 // operation legalization where we can't create illegal types.
9225 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
9226 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
9227 LD->getMemoryVT(), LD->getAlign(),
9228 LD->getMemOperand()->getFlags());
9229}
9230
9231/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
9232/// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return
9233/// the unextended value. The unextended vector should be 64 bits so that it can
9234/// be used as an operand to a VMULL instruction. If the original vector size
9235/// before extension is less than 64 bits we add a an extension to resize
9236/// the vector to 64 bits.
9238 if (N->getOpcode() == ISD::SIGN_EXTEND ||
9239 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
9240 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
9241 N->getOperand(0)->getValueType(0),
9242 N->getValueType(0),
9243 N->getOpcode());
9244
9245 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9246 assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&
9247 "Expected extending load");
9248
9249 SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
9250 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
9251 unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9252 SDValue extLoad =
9253 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
9254 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
9255
9256 return newLoad;
9257 }
9258
9259 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
9260 // have been legalized as a BITCAST from v4i32.
9261 if (N->getOpcode() == ISD::BITCAST) {
9262 SDNode *BVN = N->getOperand(0).getNode();
9264 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
9265 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9266 return DAG.getBuildVector(
9267 MVT::v2i32, SDLoc(N),
9268 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
9269 }
9270 // Construct a new BUILD_VECTOR with elements truncated to half the size.
9271 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
9272 EVT VT = N->getValueType(0);
9273 unsigned EltSize = VT.getScalarSizeInBits() / 2;
9274 unsigned NumElts = VT.getVectorNumElements();
9275 MVT TruncVT = MVT::getIntegerVT(EltSize);
9277 SDLoc dl(N);
9278 for (unsigned i = 0; i != NumElts; ++i) {
9279 const APInt &CInt = N->getConstantOperandAPInt(i);
9280 // Element types smaller than 32 bits are not legal, so use i32 elements.
9281 // The values are implicitly truncated so sext vs. zext doesn't matter.
9282 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
9283 }
9284 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
9285}
9286
9287static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
9288 unsigned Opcode = N->getOpcode();
9289 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9290 SDNode *N0 = N->getOperand(0).getNode();
9291 SDNode *N1 = N->getOperand(1).getNode();
9292 return N0->hasOneUse() && N1->hasOneUse() &&
9293 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
9294 }
9295 return false;
9296}
9297
9298static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
9299 unsigned Opcode = N->getOpcode();
9300 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9301 SDNode *N0 = N->getOperand(0).getNode();
9302 SDNode *N1 = N->getOperand(1).getNode();
9303 return N0->hasOneUse() && N1->hasOneUse() &&
9304 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
9305 }
9306 return false;
9307}
9308
9310 // Multiplications are only custom-lowered for 128-bit vectors so that
9311 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
9312 EVT VT = Op.getValueType();
9313 assert(VT.is128BitVector() && VT.isInteger() &&
9314 "unexpected type for custom-lowering ISD::MUL");
9315 SDNode *N0 = Op.getOperand(0).getNode();
9316 SDNode *N1 = Op.getOperand(1).getNode();
9317 unsigned NewOpc = 0;
9318 bool isMLA = false;
9319 bool isN0SExt = isSignExtended(N0, DAG);
9320 bool isN1SExt = isSignExtended(N1, DAG);
9321 if (isN0SExt && isN1SExt)
9322 NewOpc = ARMISD::VMULLs;
9323 else {
9324 bool isN0ZExt = isZeroExtended(N0, DAG);
9325 bool isN1ZExt = isZeroExtended(N1, DAG);
9326 if (isN0ZExt && isN1ZExt)
9327 NewOpc = ARMISD::VMULLu;
9328 else if (isN1SExt || isN1ZExt) {
9329 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
9330 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
9331 if (isN1SExt && isAddSubSExt(N0, DAG)) {
9332 NewOpc = ARMISD::VMULLs;
9333 isMLA = true;
9334 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
9335 NewOpc = ARMISD::VMULLu;
9336 isMLA = true;
9337 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
9338 std::swap(N0, N1);
9339 NewOpc = ARMISD::VMULLu;
9340 isMLA = true;
9341 }
9342 }
9343
9344 if (!NewOpc) {
9345 if (VT == MVT::v2i64)
9346 // Fall through to expand this. It is not legal.
9347 return SDValue();
9348 else
9349 // Other vector multiplications are legal.
9350 return Op;
9351 }
9352 }
9353
9354 // Legalize to a VMULL instruction.
9355 SDLoc DL(Op);
9356 SDValue Op0;
9357 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
9358 if (!isMLA) {
9359 Op0 = SkipExtensionForVMULL(N0, DAG);
9361 Op1.getValueType().is64BitVector() &&
9362 "unexpected types for extended operands to VMULL");
9363 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
9364 }
9365
9366 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
9367 // isel lowering to take advantage of no-stall back to back vmul + vmla.
9368 // vmull q0, d4, d6
9369 // vmlal q0, d5, d6
9370 // is faster than
9371 // vaddl q0, d4, d5
9372 // vmovl q1, d6
9373 // vmul q0, q0, q1
9374 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
9375 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
9376 EVT Op1VT = Op1.getValueType();
9377 return DAG.getNode(N0->getOpcode(), DL, VT,
9378 DAG.getNode(NewOpc, DL, VT,
9379 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
9380 DAG.getNode(NewOpc, DL, VT,
9381 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
9382}
9383
9385 SelectionDAG &DAG) {
9386 // TODO: Should this propagate fast-math-flags?
9387
9388 // Convert to float
9389 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
9390 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
9391 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
9392 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
9393 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
9394 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
9395 // Get reciprocal estimate.
9396 // float4 recip = vrecpeq_f32(yf);
9397 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9398 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9399 Y);
9400 // Because char has a smaller range than uchar, we can actually get away
9401 // without any newton steps. This requires that we use a weird bias
9402 // of 0xb000, however (again, this has been exhaustively tested).
9403 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
9404 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
9405 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
9406 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
9407 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
9408 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
9409 // Convert back to short.
9410 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
9411 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
9412 return X;
9413}
9414
9416 SelectionDAG &DAG) {
9417 // TODO: Should this propagate fast-math-flags?
9418
9419 SDValue N2;
9420 // Convert to float.
9421 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
9422 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
9423 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
9424 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
9425 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9426 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9427
9428 // Use reciprocal estimate and one refinement step.
9429 // float4 recip = vrecpeq_f32(yf);
9430 // recip *= vrecpsq_f32(yf, recip);
9431 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9432 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9433 N1);
9434 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9435 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9436 N1, N2);
9437 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9438 // Because short has a smaller range than ushort, we can actually get away
9439 // with only a single newton step. This requires that we use a weird bias
9440 // of 89, however (again, this has been exhaustively tested).
9441 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
9442 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9443 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9444 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
9445 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9446 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9447 // Convert back to integer and return.
9448 // return vmovn_s32(vcvt_s32_f32(result));
9449 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9450 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9451 return N0;
9452}
9453
9455 const ARMSubtarget *ST) {
9456 EVT VT = Op.getValueType();
9457 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9458 "unexpected type for custom-lowering ISD::SDIV");
9459
9460 SDLoc dl(Op);
9461 SDValue N0 = Op.getOperand(0);
9462 SDValue N1 = Op.getOperand(1);
9463 SDValue N2, N3;
9464
9465 if (VT == MVT::v8i8) {
9466 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
9467 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
9468
9469 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9470 DAG.getIntPtrConstant(4, dl));
9471 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9472 DAG.getIntPtrConstant(4, dl));
9473 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9474 DAG.getIntPtrConstant(0, dl));
9475 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9476 DAG.getIntPtrConstant(0, dl));
9477
9478 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
9479 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
9480
9481 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9482 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9483
9484 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
9485 return N0;
9486 }
9487 return LowerSDIV_v4i16(N0, N1, dl, DAG);
9488}
9489
9491 const ARMSubtarget *ST) {
9492 // TODO: Should this propagate fast-math-flags?
9493 EVT VT = Op.getValueType();
9494 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9495 "unexpected type for custom-lowering ISD::UDIV");
9496
9497 SDLoc dl(Op);
9498 SDValue N0 = Op.getOperand(0);
9499 SDValue N1 = Op.getOperand(1);
9500 SDValue N2, N3;
9501
9502 if (VT == MVT::v8i8) {
9503 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
9504 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
9505
9506 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9507 DAG.getIntPtrConstant(4, dl));
9508 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9509 DAG.getIntPtrConstant(4, dl));
9510 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9511 DAG.getIntPtrConstant(0, dl));
9512 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9513 DAG.getIntPtrConstant(0, dl));
9514
9515 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
9516 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
9517
9518 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9519 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9520
9521 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
9522 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
9523 MVT::i32),
9524 N0);
9525 return N0;
9526 }
9527
9528 // v4i16 sdiv ... Convert to float.
9529 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
9530 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
9531 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
9532 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
9533 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9534 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9535
9536 // Use reciprocal estimate and two refinement steps.
9537 // float4 recip = vrecpeq_f32(yf);
9538 // recip *= vrecpsq_f32(yf, recip);
9539 // recip *= vrecpsq_f32(yf, recip);
9540 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9541 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9542 BN1);
9543 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9544 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9545 BN1, N2);
9546 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9547 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9548 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9549 BN1, N2);
9550 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9551 // Simply multiplying by the reciprocal estimate can leave us a few ulps
9552 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
9553 // and that it will never cause us to return an answer too large).
9554 // float4 result = as_float4(as_int4(xf*recip) + 2);
9555 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9556 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9557 N1 = DAG.getConstant(2, dl, MVT::v4i32);
9558 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9559 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9560 // Convert back to integer and return.
9561 // return vmovn_u32(vcvt_s32_f32(result));
9562 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9563 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9564 return N0;
9565}
9566
9568 SDNode *N = Op.getNode();
9569 EVT VT = N->getValueType(0);
9570 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
9571
9572 SDValue Carry = Op.getOperand(2);
9573
9574 SDLoc DL(Op);
9575
9576 SDValue Result;
9577 if (Op.getOpcode() == ISD::UADDO_CARRY) {
9578 // This converts the boolean value carry into the carry flag.
9579 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9580
9581 // Do the addition proper using the carry flag we wanted.
9582 Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
9583 Op.getOperand(1), Carry);
9584
9585 // Now convert the carry flag into a boolean value.
9586 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9587 } else {
9588 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
9589 // have to invert the carry first.
9590 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9591 DAG.getConstant(1, DL, MVT::i32), Carry);
9592 // This converts the boolean value carry into the carry flag.
9593 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9594
9595 // Do the subtraction proper using the carry flag we wanted.
9596 Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
9597 Op.getOperand(1), Carry);
9598
9599 // Now convert the carry flag into a boolean value.
9600 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9601 // But the carry returned by ARMISD::SUBE is not a borrow as expected
9602 // by ISD::USUBO_CARRY, so compute 1 - C.
9603 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9604 DAG.getConstant(1, DL, MVT::i32), Carry);
9605 }
9606
9607 // Return both values.
9608 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
9609}
9610
9611SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
9612 bool Signed,
9613 SDValue &Chain) const {
9614 EVT VT = Op.getValueType();
9615 assert((VT == MVT::i32 || VT == MVT::i64) &&
9616 "unexpected type for custom lowering DIV");
9617 SDLoc dl(Op);
9618
9619 const auto &DL = DAG.getDataLayout();
9620 RTLIB::Libcall LC;
9621 if (Signed)
9622 LC = VT == MVT::i32 ? RTLIB::SDIVREM_I32 : RTLIB::SDIVREM_I64;
9623 else
9624 LC = VT == MVT::i32 ? RTLIB::UDIVREM_I32 : RTLIB::UDIVREM_I64;
9625
9626 const char *Name = getLibcallName(LC);
9627 SDValue ES = DAG.getExternalSymbol(Name, getPointerTy(DL));
9628
9630
9631 for (auto AI : {1, 0}) {
9632 SDValue Operand = Op.getOperand(AI);
9633 Args.emplace_back(Operand,
9634 Operand.getValueType().getTypeForEVT(*DAG.getContext()));
9635 }
9636
9637 CallLoweringInfo CLI(DAG);
9638 CLI.setDebugLoc(dl)
9639 .setChain(Chain)
9641 ES, std::move(Args));
9642
9643 return LowerCallTo(CLI).first;
9644}
9645
9646// This is a code size optimisation: return the original SDIV node to
9647// DAGCombiner when we don't want to expand SDIV into a sequence of
9648// instructions, and an empty node otherwise which will cause the
9649// SDIV to be expanded in DAGCombine.
9650SDValue
9651ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
9652 SelectionDAG &DAG,
9653 SmallVectorImpl<SDNode *> &Created) const {
9654 // TODO: Support SREM
9655 if (N->getOpcode() != ISD::SDIV)
9656 return SDValue();
9657
9658 const auto &ST = DAG.getSubtarget<ARMSubtarget>();
9659 const bool MinSize = ST.hasMinSize();
9660 const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
9661 : ST.hasDivideInARMMode();
9662
9663 // Don't touch vector types; rewriting this may lead to scalarizing
9664 // the int divs.
9665 if (N->getOperand(0).getValueType().isVector())
9666 return SDValue();
9667
9668 // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
9669 // hwdiv support for this to be really profitable.
9670 if (!(MinSize && HasDivide))
9671 return SDValue();
9672
9673 // ARM mode is a bit simpler than Thumb: we can handle large power
9674 // of 2 immediates with 1 mov instruction; no further checks required,
9675 // just return the sdiv node.
9676 if (!ST.isThumb())
9677 return SDValue(N, 0);
9678
9679 // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
9680 // and thus lose the code size benefits of a MOVS that requires only 2.
9681 // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
9682 // but as it's doing exactly this, it's not worth the trouble to get TTI.
9683 if (Divisor.sgt(128))
9684 return SDValue();
9685
9686 return SDValue(N, 0);
9687}
9688
9689SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
9690 bool Signed) const {
9691 assert(Op.getValueType() == MVT::i32 &&
9692 "unexpected type for custom lowering DIV");
9693 SDLoc dl(Op);
9694
9695 SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
9696 DAG.getEntryNode(), Op.getOperand(1));
9697
9698 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
9699}
9700
9702 SDLoc DL(N);
9703 SDValue Op = N->getOperand(1);
9704 if (N->getValueType(0) == MVT::i32)
9705 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
9706 SDValue Lo, Hi;
9707 std::tie(Lo, Hi) = DAG.SplitScalar(Op, DL, MVT::i32, MVT::i32);
9708 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
9709 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
9710}
9711
9712void ARMTargetLowering::ExpandDIV_Windows(
9713 SDValue Op, SelectionDAG &DAG, bool Signed,
9715 const auto &DL = DAG.getDataLayout();
9716
9717 assert(Op.getValueType() == MVT::i64 &&
9718 "unexpected type for custom lowering DIV");
9719 SDLoc dl(Op);
9720
9721 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
9722
9723 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
9724
9725 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
9726 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
9727 DAG.getConstant(32, dl, getPointerTy(DL)));
9728 Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
9729
9730 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lower, Upper));
9731}
9732
9734 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
9735 EVT MemVT = LD->getMemoryVT();
9736 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
9737 MemVT == MVT::v16i1) &&
9738 "Expected a predicate type!");
9739 assert(MemVT == Op.getValueType());
9740 assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
9741 "Expected a non-extending load");
9742 assert(LD->isUnindexed() && "Expected a unindexed load");
9743
9744 // The basic MVE VLDR on a v2i1/v4i1/v8i1 actually loads the entire 16bit
9745 // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
9746 // need to make sure that 8/4/2 bits are actually loaded into the correct
9747 // place, which means loading the value and then shuffling the values into
9748 // the bottom bits of the predicate.
9749 // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
9750 // for BE).
9751 // Speaking of BE, apparently the rest of llvm will assume a reverse order to
9752 // a natural VMSR(load), so needs to be reversed.
9753
9754 SDLoc dl(Op);
9755 SDValue Load = DAG.getExtLoad(
9756 ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
9758 LD->getMemOperand());
9759 SDValue Val = Load;
9760 if (DAG.getDataLayout().isBigEndian())
9761 Val = DAG.getNode(ISD::SRL, dl, MVT::i32,
9762 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Load),
9763 DAG.getConstant(32 - MemVT.getSizeInBits(), dl, MVT::i32));
9764 SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Val);
9765 if (MemVT != MVT::v16i1)
9766 Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred,
9767 DAG.getConstant(0, dl, MVT::i32));
9768 return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
9769}
9770
9771void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
9772 SelectionDAG &DAG) const {
9773 LoadSDNode *LD = cast<LoadSDNode>(N);
9774 EVT MemVT = LD->getMemoryVT();
9775 assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
9776
9777 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
9778 !Subtarget->isThumb1Only() && LD->isVolatile() &&
9779 LD->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
9780 SDLoc dl(N);
9782 ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
9783 {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
9784 SDValue Lo = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 0 : 1);
9785 SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0);
9786 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
9787 Results.append({Pair, Result.getValue(2)});
9788 }
9789}
9790
9792 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
9793 EVT MemVT = ST->getMemoryVT();
9794 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
9795 MemVT == MVT::v16i1) &&
9796 "Expected a predicate type!");
9797 assert(MemVT == ST->getValue().getValueType());
9798 assert(!ST->isTruncatingStore() && "Expected a non-extending store");
9799 assert(ST->isUnindexed() && "Expected a unindexed store");
9800
9801 // Only store the v2i1 or v4i1 or v8i1 worth of bits, via a buildvector with
9802 // top bits unset and a scalar store.
9803 SDLoc dl(Op);
9804 SDValue Build = ST->getValue();
9805 if (MemVT != MVT::v16i1) {
9807 for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++) {
9808 unsigned Elt = DAG.getDataLayout().isBigEndian()
9809 ? MemVT.getVectorNumElements() - I - 1
9810 : I;
9811 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build,
9812 DAG.getConstant(Elt, dl, MVT::i32)));
9813 }
9814 for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++)
9815 Ops.push_back(DAG.getUNDEF(MVT::i32));
9816 Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops);
9817 }
9818 SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
9819 if (MemVT == MVT::v16i1 && DAG.getDataLayout().isBigEndian())
9820 GRP = DAG.getNode(ISD::SRL, dl, MVT::i32,
9821 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, GRP),
9822 DAG.getConstant(16, dl, MVT::i32));
9823 return DAG.getTruncStore(
9824 ST->getChain(), dl, GRP, ST->getBasePtr(),
9826 ST->getMemOperand());
9827}
9828
9830 const ARMSubtarget *Subtarget) {
9831 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
9832 EVT MemVT = ST->getMemoryVT();
9833 assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
9834
9835 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
9836 !Subtarget->isThumb1Only() && ST->isVolatile() &&
9837 ST->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
9838 SDNode *N = Op.getNode();
9839 SDLoc dl(N);
9840
9841 SDValue Lo = DAG.getNode(
9842 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
9843 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 0 : 1, dl,
9844 MVT::i32));
9845 SDValue Hi = DAG.getNode(
9846 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
9847 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 1 : 0, dl,
9848 MVT::i32));
9849
9850 return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
9851 {ST->getChain(), Lo, Hi, ST->getBasePtr()},
9852 MemVT, ST->getMemOperand());
9853 } else if (Subtarget->hasMVEIntegerOps() &&
9854 ((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
9855 MemVT == MVT::v16i1))) {
9856 return LowerPredicateStore(Op, DAG);
9857 }
9858
9859 return SDValue();
9860}
9861
9862static bool isZeroVector(SDValue N) {
9863 return (ISD::isBuildVectorAllZeros(N.getNode()) ||
9864 (N->getOpcode() == ARMISD::VMOVIMM &&
9865 isNullConstant(N->getOperand(0))));
9866}
9867
9870 MVT VT = Op.getSimpleValueType();
9871 SDValue Mask = N->getMask();
9872 SDValue PassThru = N->getPassThru();
9873 SDLoc dl(Op);
9874
9875 if (isZeroVector(PassThru))
9876 return Op;
9877
9878 // MVE Masked loads use zero as the passthru value. Here we convert undef to
9879 // zero too, and other values are lowered to a select.
9880 SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
9881 DAG.getTargetConstant(0, dl, MVT::i32));
9882 SDValue NewLoad = DAG.getMaskedLoad(
9883 VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,
9884 N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
9885 N->getExtensionType(), N->isExpandingLoad());
9886 SDValue Combo = NewLoad;
9887 bool PassThruIsCastZero = (PassThru.getOpcode() == ISD::BITCAST ||
9888 PassThru.getOpcode() == ARMISD::VECTOR_REG_CAST) &&
9889 isZeroVector(PassThru->getOperand(0));
9890 if (!PassThru.isUndef() && !PassThruIsCastZero)
9891 Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
9892 return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
9893}
9894
9896 const ARMSubtarget *ST) {
9897 if (!ST->hasMVEIntegerOps())
9898 return SDValue();
9899
9900 SDLoc dl(Op);
9901 unsigned BaseOpcode = 0;
9902 switch (Op->getOpcode()) {
9903 default: llvm_unreachable("Expected VECREDUCE opcode");
9904 case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
9905 case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
9906 case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
9907 case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
9908 case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
9909 case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
9910 case ISD::VECREDUCE_FMAX: BaseOpcode = ISD::FMAXNUM; break;
9911 case ISD::VECREDUCE_FMIN: BaseOpcode = ISD::FMINNUM; break;
9912 }
9913
9914 SDValue Op0 = Op->getOperand(0);
9915 EVT VT = Op0.getValueType();
9916 EVT EltVT = VT.getVectorElementType();
9917 unsigned NumElts = VT.getVectorNumElements();
9918 unsigned NumActiveLanes = NumElts;
9919
9920 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
9921 NumActiveLanes == 2) &&
9922 "Only expected a power 2 vector size");
9923
9924 // Use Mul(X, Rev(X)) until 4 items remain. Going down to 4 vector elements
9925 // allows us to easily extract vector elements from the lanes.
9926 while (NumActiveLanes > 4) {
9927 unsigned RevOpcode = NumActiveLanes == 16 ? ARMISD::VREV16 : ARMISD::VREV32;
9928 SDValue Rev = DAG.getNode(RevOpcode, dl, VT, Op0);
9929 Op0 = DAG.getNode(BaseOpcode, dl, VT, Op0, Rev);
9930 NumActiveLanes /= 2;
9931 }
9932
9933 SDValue Res;
9934 if (NumActiveLanes == 4) {
9935 // The remaining 4 elements are summed sequentially
9936 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
9937 DAG.getConstant(0 * NumElts / 4, dl, MVT::i32));
9938 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
9939 DAG.getConstant(1 * NumElts / 4, dl, MVT::i32));
9940 SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
9941 DAG.getConstant(2 * NumElts / 4, dl, MVT::i32));
9942 SDValue Ext3 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
9943 DAG.getConstant(3 * NumElts / 4, dl, MVT::i32));
9944 SDValue Res0 = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
9945 SDValue Res1 = DAG.getNode(BaseOpcode, dl, EltVT, Ext2, Ext3, Op->getFlags());
9946 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res0, Res1, Op->getFlags());
9947 } else {
9948 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
9949 DAG.getConstant(0, dl, MVT::i32));
9950 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
9951 DAG.getConstant(1, dl, MVT::i32));
9952 Res = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
9953 }
9954
9955 // Result type may be wider than element type.
9956 if (EltVT != Op->getValueType(0))
9957 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Op->getValueType(0), Res);
9958 return Res;
9959}
9960
9962 const ARMSubtarget *ST) {
9963 if (!ST->hasMVEFloatOps())
9964 return SDValue();
9965 return LowerVecReduce(Op, DAG, ST);
9966}
9967
9969 const ARMSubtarget *ST) {
9970 if (!ST->hasNEON())
9971 return SDValue();
9972
9973 SDLoc dl(Op);
9974 SDValue Op0 = Op->getOperand(0);
9975 EVT VT = Op0.getValueType();
9976 EVT EltVT = VT.getVectorElementType();
9977
9978 unsigned PairwiseIntrinsic = 0;
9979 switch (Op->getOpcode()) {
9980 default:
9981 llvm_unreachable("Expected VECREDUCE opcode");
9982 case ISD::VECREDUCE_UMIN:
9983 PairwiseIntrinsic = Intrinsic::arm_neon_vpminu;
9984 break;
9985 case ISD::VECREDUCE_UMAX:
9986 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxu;
9987 break;
9988 case ISD::VECREDUCE_SMIN:
9989 PairwiseIntrinsic = Intrinsic::arm_neon_vpmins;
9990 break;
9991 case ISD::VECREDUCE_SMAX:
9992 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxs;
9993 break;
9994 }
9995 SDValue PairwiseOp = DAG.getConstant(PairwiseIntrinsic, dl, MVT::i32);
9996
9997 unsigned NumElts = VT.getVectorNumElements();
9998 unsigned NumActiveLanes = NumElts;
9999
10000 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10001 NumActiveLanes == 2) &&
10002 "Only expected a power 2 vector size");
10003
10004 // Split 128-bit vectors, since vpmin/max takes 2 64-bit vectors.
10005 if (VT.is128BitVector()) {
10006 SDValue Lo, Hi;
10007 std::tie(Lo, Hi) = DAG.SplitVector(Op0, dl);
10008 VT = Lo.getValueType();
10009 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Lo, Hi});
10010 NumActiveLanes /= 2;
10011 }
10012
10013 // Use pairwise reductions until one lane remains
10014 while (NumActiveLanes > 1) {
10015 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Op0, Op0});
10016 NumActiveLanes /= 2;
10017 }
10018
10019 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10020 DAG.getConstant(0, dl, MVT::i32));
10021
10022 // Result type may be wider than element type.
10023 if (EltVT != Op.getValueType()) {
10024 unsigned Extend = 0;
10025 switch (Op->getOpcode()) {
10026 default:
10027 llvm_unreachable("Expected VECREDUCE opcode");
10028 case ISD::VECREDUCE_UMIN:
10029 case ISD::VECREDUCE_UMAX:
10030 Extend = ISD::ZERO_EXTEND;
10031 break;
10032 case ISD::VECREDUCE_SMIN:
10033 case ISD::VECREDUCE_SMAX:
10034 Extend = ISD::SIGN_EXTEND;
10035 break;
10036 }
10037 Res = DAG.getNode(Extend, dl, Op.getValueType(), Res);
10038 }
10039 return Res;
10040}
10041
10043 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getSuccessOrdering()))
10044 // Acquire/Release load/store is not legal for targets without a dmb or
10045 // equivalent available.
10046 return SDValue();
10047
10048 // Monotonic load/store is legal for all targets.
10049 return Op;
10050}
10051
10054 SelectionDAG &DAG,
10055 const ARMSubtarget *Subtarget) {
10056 SDLoc DL(N);
10057 // Under Power Management extensions, the cycle-count is:
10058 // mrc p15, #0, <Rt>, c9, c13, #0
10059 SDValue Ops[] = { N->getOperand(0), // Chain
10060 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
10061 DAG.getTargetConstant(15, DL, MVT::i32),
10062 DAG.getTargetConstant(0, DL, MVT::i32),
10063 DAG.getTargetConstant(9, DL, MVT::i32),
10064 DAG.getTargetConstant(13, DL, MVT::i32),
10065 DAG.getTargetConstant(0, DL, MVT::i32)
10066 };
10067
10068 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
10069 DAG.getVTList(MVT::i32, MVT::Other), Ops);
10070 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
10071 DAG.getConstant(0, DL, MVT::i32)));
10072 Results.push_back(Cycles32.getValue(1));
10073}
10074
10076 SDValue V1) {
10077 SDLoc dl(V0.getNode());
10078 SDValue RegClass =
10079 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
10080 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
10081 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
10082 const SDValue Ops[] = {RegClass, V0, SubReg0, V1, SubReg1};
10083 return SDValue(
10084 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
10085}
10086
10088 SDLoc dl(V.getNode());
10089 auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);
10090 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10091 if (isBigEndian)
10092 std::swap(VLo, VHi);
10093 return createGPRPairNode2xi32(DAG, VLo, VHi);
10094}
10095
10098 SelectionDAG &DAG) {
10099 assert(N->getValueType(0) == MVT::i64 &&
10100 "AtomicCmpSwap on types less than 64 should be legal");
10101 SDValue Ops[] = {
10102 createGPRPairNode2xi32(DAG, N->getOperand(1),
10103 DAG.getUNDEF(MVT::i32)), // pointer, temp
10104 createGPRPairNodei64(DAG, N->getOperand(2)), // expected
10105 createGPRPairNodei64(DAG, N->getOperand(3)), // new
10106 N->getOperand(0), // chain in
10107 };
10108 SDNode *CmpSwap = DAG.getMachineNode(
10109 ARM::CMP_SWAP_64, SDLoc(N),
10110 DAG.getVTList(MVT::Untyped, MVT::Untyped, MVT::Other), Ops);
10111
10112 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
10113 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
10114
10115 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10116
10117 SDValue Lo =
10118 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
10119 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10120 SDValue Hi =
10121 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
10122 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10123 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i64, Lo, Hi));
10124 Results.push_back(SDValue(CmpSwap, 2));
10125}
10126
10127SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
10128 SDLoc dl(Op);
10129 EVT VT = Op.getValueType();
10130 SDValue Chain = Op.getOperand(0);
10131 SDValue LHS = Op.getOperand(1);
10132 SDValue RHS = Op.getOperand(2);
10133 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
10134 bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
10135
10136 // If we don't have instructions of this float type then soften to a libcall
10137 // and use SETCC instead.
10138 if (isUnsupportedFloatingType(LHS.getValueType())) {
10139 softenSetCCOperands(DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS,
10140 Chain, IsSignaling);
10141 if (!RHS.getNode()) {
10142 RHS = DAG.getConstant(0, dl, LHS.getValueType());
10143 CC = ISD::SETNE;
10144 }
10145 SDValue Result = DAG.getNode(ISD::SETCC, dl, VT, LHS, RHS,
10146 DAG.getCondCode(CC));
10147 return DAG.getMergeValues({Result, Chain}, dl);
10148 }
10149
10150 ARMCC::CondCodes CondCode, CondCode2;
10151 FPCCToARMCC(CC, CondCode, CondCode2);
10152
10153 SDValue True = DAG.getConstant(1, dl, VT);
10154 SDValue False = DAG.getConstant(0, dl, VT);
10155 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
10156 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10157 SDValue Result = getCMOV(dl, VT, False, True, ARMcc, Cmp, DAG);
10158 if (CondCode2 != ARMCC::AL) {
10159 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
10160 Result = getCMOV(dl, VT, Result, True, ARMcc, Cmp, DAG);
10161 }
10162 return DAG.getMergeValues({Result, Chain}, dl);
10163}
10164
10165SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const {
10166 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10167
10168 EVT VT = getPointerTy(DAG.getDataLayout());
10169 int FI = MFI.CreateFixedObject(4, 0, false);
10170 return DAG.getFrameIndex(FI, VT);
10171}
10172
10173SDValue ARMTargetLowering::LowerFP_TO_BF16(SDValue Op,
10174 SelectionDAG &DAG) const {
10175 SDLoc DL(Op);
10176 MakeLibCallOptions CallOptions;
10177 MVT SVT = Op.getOperand(0).getSimpleValueType();
10178 RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, MVT::bf16);
10179 SDValue Res =
10180 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
10181 return DAG.getBitcast(MVT::i32, Res);
10182}
10183
10184SDValue ARMTargetLowering::LowerCMP(SDValue Op, SelectionDAG &DAG) const {
10185 SDLoc dl(Op);
10186 SDValue LHS = Op.getOperand(0);
10187 SDValue RHS = Op.getOperand(1);
10188
10189 // Determine if this is signed or unsigned comparison
10190 bool IsSigned = (Op.getOpcode() == ISD::SCMP);
10191
10192 // Special case for Thumb1 UCMP only
10193 if (!IsSigned && Subtarget->isThumb1Only()) {
10194 // For Thumb unsigned comparison, use this sequence:
10195 // subs r2, r0, r1 ; r2 = LHS - RHS, sets flags
10196 // sbc r2, r2 ; r2 = r2 - r2 - !carry
10197 // cmp r1, r0 ; compare RHS with LHS
10198 // sbc r1, r1 ; r1 = r1 - r1 - !carry
10199 // subs r0, r2, r1 ; r0 = r2 - r1 (final result)
10200
10201 // First subtraction: LHS - RHS
10202 SDValue Sub1WithFlags = DAG.getNode(
10203 ARMISD::SUBC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
10204 SDValue Sub1Result = Sub1WithFlags.getValue(0);
10205 SDValue Flags1 = Sub1WithFlags.getValue(1);
10206
10207 // SUBE: Sub1Result - Sub1Result - !carry
10208 // This gives 0 if LHS >= RHS (unsigned), -1 if LHS < RHS (unsigned)
10209 SDValue Sbc1 =
10210 DAG.getNode(ARMISD::SUBE, dl, DAG.getVTList(MVT::i32, FlagsVT),
10211 Sub1Result, Sub1Result, Flags1);
10212 SDValue Sbc1Result = Sbc1.getValue(0);
10213
10214 // Second comparison: RHS vs LHS (reverse comparison)
10215 SDValue CmpFlags = DAG.getNode(ARMISD::CMP, dl, FlagsVT, RHS, LHS);
10216
10217 // SUBE: RHS - RHS - !carry
10218 // This gives 0 if RHS <= LHS (unsigned), -1 if RHS > LHS (unsigned)
10219 SDValue Sbc2 = DAG.getNode(
10220 ARMISD::SUBE, dl, DAG.getVTList(MVT::i32, FlagsVT), RHS, RHS, CmpFlags);
10221 SDValue Sbc2Result = Sbc2.getValue(0);
10222
10223 // Final subtraction: Sbc1Result - Sbc2Result (no flags needed)
10224 SDValue Result =
10225 DAG.getNode(ISD::SUB, dl, MVT::i32, Sbc1Result, Sbc2Result);
10226 if (Op.getValueType() != MVT::i32)
10227 Result = DAG.getSExtOrTrunc(Result, dl, Op.getValueType());
10228
10229 return Result;
10230 }
10231
10232 // For the ARM assembly pattern:
10233 // subs r0, r0, r1 ; subtract RHS from LHS and set flags
10234 // movgt r0, #1 ; if LHS > RHS, set result to 1 (GT for signed, HI for
10235 // unsigned) mvnlt r0, #0 ; if LHS < RHS, set result to -1 (LT for
10236 // signed, LO for unsigned)
10237 // ; if LHS == RHS, result remains 0 from the subs
10238
10239 // Optimization: if RHS is a subtraction against 0, use ADDC instead of SUBC
10240 unsigned Opcode = ARMISD::SUBC;
10241
10242 // Check if RHS is a subtraction against 0: (0 - X)
10243 if (RHS.getOpcode() == ISD::SUB) {
10244 SDValue SubLHS = RHS.getOperand(0);
10245 SDValue SubRHS = RHS.getOperand(1);
10246
10247 // Check if it's 0 - X
10248 if (isNullConstant(SubLHS)) {
10249 bool CanUseAdd = false;
10250 if (IsSigned) {
10251 // For SCMP: only if X is known to never be INT_MIN (to avoid overflow)
10252 if (RHS->getFlags().hasNoSignedWrap() || !DAG.computeKnownBits(SubRHS)
10254 .isMinSignedValue()) {
10255 CanUseAdd = true;
10256 }
10257 } else {
10258 // For UCMP: only if X is known to never be zero
10259 if (DAG.isKnownNeverZero(SubRHS)) {
10260 CanUseAdd = true;
10261 }
10262 }
10263
10264 if (CanUseAdd) {
10265 Opcode = ARMISD::ADDC;
10266 RHS = SubRHS; // Replace RHS with X, so we do LHS + X instead of
10267 // LHS - (0 - X)
10268 }
10269 }
10270 }
10271
10272 // Generate the operation with flags
10273 SDValue OpWithFlags =
10274 DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
10275
10276 SDValue OpResult = OpWithFlags.getValue(0);
10277 SDValue Flags = OpWithFlags.getValue(1);
10278
10279 // Constants for conditional moves
10280 SDValue One = DAG.getConstant(1, dl, MVT::i32);
10281 SDValue MinusOne = DAG.getAllOnesConstant(dl, MVT::i32);
10282
10283 // Select condition codes based on signed vs unsigned
10284 ARMCC::CondCodes GTCond = IsSigned ? ARMCC::GT : ARMCC::HI;
10285 ARMCC::CondCodes LTCond = IsSigned ? ARMCC::LT : ARMCC::LO;
10286
10287 // First conditional move: if greater than, set to 1
10288 SDValue GTCondValue = DAG.getConstant(GTCond, dl, MVT::i32);
10289 SDValue Result1 = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, OpResult, One,
10290 GTCondValue, Flags);
10291
10292 // Second conditional move: if less than, set to -1
10293 SDValue LTCondValue = DAG.getConstant(LTCond, dl, MVT::i32);
10294 SDValue Result2 = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, Result1, MinusOne,
10295 LTCondValue, Flags);
10296
10297 if (Op.getValueType() != MVT::i32)
10298 Result2 = DAG.getSExtOrTrunc(Result2, dl, Op.getValueType());
10299
10300 return Result2;
10301}
10302
10304 LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
10305 switch (Op.getOpcode()) {
10306 default: llvm_unreachable("Don't know how to custom lower this!");
10307 case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
10308 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10309 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10310 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10311 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10312 case ISD::SELECT: return LowerSELECT(Op, DAG);
10313 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10314 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
10315 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
10316 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
10317 case ISD::VASTART: return LowerVASTART(Op, DAG);
10318 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
10319 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
10320 case ISD::SINT_TO_FP:
10321 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10324 case ISD::FP_TO_SINT:
10325 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
10327 case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG, Subtarget);
10328 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
10329 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10330 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10331 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
10332 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
10333 case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
10334 case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);
10335 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
10336 Subtarget);
10337 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
10338 case ISD::SHL:
10339 case ISD::SRL:
10340 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
10341 case ISD::SREM: return LowerREM(Op.getNode(), DAG);
10342 case ISD::UREM: return LowerREM(Op.getNode(), DAG);
10343 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
10344 case ISD::SRL_PARTS:
10345 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
10346 case ISD::CTTZ:
10347 case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
10348 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
10349 case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget);
10350 case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
10351 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
10352 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
10353 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
10354 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG, Subtarget);
10355 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10356 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);
10357 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);
10358 case ISD::TRUNCATE: return LowerTruncate(Op.getNode(), DAG, Subtarget);
10359 case ISD::SIGN_EXTEND:
10360 case ISD::ZERO_EXTEND: return LowerVectorExtend(Op.getNode(), DAG, Subtarget);
10361 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
10362 case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
10363 case ISD::SET_FPMODE:
10364 return LowerSET_FPMODE(Op, DAG);
10365 case ISD::RESET_FPMODE:
10366 return LowerRESET_FPMODE(Op, DAG);
10367 case ISD::MUL: return LowerMUL(Op, DAG);
10368 case ISD::SDIV:
10369 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10370 return LowerDIV_Windows(Op, DAG, /* Signed */ true);
10371 return LowerSDIV(Op, DAG, Subtarget);
10372 case ISD::UDIV:
10373 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10374 return LowerDIV_Windows(Op, DAG, /* Signed */ false);
10375 return LowerUDIV(Op, DAG, Subtarget);
10376 case ISD::UADDO_CARRY:
10377 case ISD::USUBO_CARRY:
10378 return LowerUADDSUBO_CARRY(Op, DAG);
10379 case ISD::SADDO:
10380 case ISD::SSUBO:
10381 return LowerSignedALUO(Op, DAG);
10382 case ISD::UADDO:
10383 case ISD::USUBO:
10384 return LowerUnsignedALUO(Op, DAG);
10385 case ISD::SADDSAT:
10386 case ISD::SSUBSAT:
10387 case ISD::UADDSAT:
10388 case ISD::USUBSAT:
10389 return LowerADDSUBSAT(Op, DAG, Subtarget);
10390 case ISD::LOAD:
10391 return LowerPredicateLoad(Op, DAG);
10392 case ISD::STORE:
10393 return LowerSTORE(Op, DAG, Subtarget);
10394 case ISD::MLOAD:
10395 return LowerMLOAD(Op, DAG);
10396 case ISD::VECREDUCE_MUL:
10397 case ISD::VECREDUCE_AND:
10398 case ISD::VECREDUCE_OR:
10399 case ISD::VECREDUCE_XOR:
10400 return LowerVecReduce(Op, DAG, Subtarget);
10401 case ISD::VECREDUCE_FADD:
10402 case ISD::VECREDUCE_FMUL:
10403 case ISD::VECREDUCE_FMIN:
10404 case ISD::VECREDUCE_FMAX:
10405 return LowerVecReduceF(Op, DAG, Subtarget);
10406 case ISD::VECREDUCE_UMIN:
10407 case ISD::VECREDUCE_UMAX:
10408 case ISD::VECREDUCE_SMIN:
10409 case ISD::VECREDUCE_SMAX:
10410 return LowerVecReduceMinMax(Op, DAG, Subtarget);
10411 case ISD::ATOMIC_LOAD:
10412 case ISD::ATOMIC_STORE:
10413 return LowerAtomicLoadStore(Op, DAG);
10414 case ISD::SDIVREM:
10415 case ISD::UDIVREM: return LowerDivRem(Op, DAG);
10416 case ISD::DYNAMIC_STACKALLOC:
10417 if (Subtarget->isTargetWindows())
10418 return LowerDYNAMIC_STACKALLOC(Op, DAG);
10419 llvm_unreachable("Don't know how to custom lower this!");
10421 case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
10423 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10424 case ISD::STRICT_FSETCC:
10425 case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
10426 case ISD::SPONENTRY:
10427 return LowerSPONENTRY(Op, DAG);
10428 case ISD::FP_TO_BF16:
10429 return LowerFP_TO_BF16(Op, DAG);
10430 case ARMISD::WIN__DBZCHK: return SDValue();
10431 case ISD::UCMP:
10432 case ISD::SCMP:
10433 return LowerCMP(Op, DAG);
10434 case ISD::ABS:
10435 return LowerABS(Op, DAG);
10436 case ISD::STRICT_LROUND:
10438 case ISD::STRICT_LRINT:
10439 case ISD::STRICT_LLRINT: {
10440 assert((Op.getOperand(1).getValueType() == MVT::f16 ||
10441 Op.getOperand(1).getValueType() == MVT::bf16) &&
10442 "Expected custom lowering of rounding operations only for f16");
10443 SDLoc DL(Op);
10444 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
10445 {Op.getOperand(0), Op.getOperand(1)});
10446 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
10447 {Ext.getValue(1), Ext.getValue(0)});
10448 }
10449 }
10450}
10451
10453 SelectionDAG &DAG) {
10454 unsigned IntNo = N->getConstantOperandVal(0);
10455 unsigned Opc = 0;
10456 if (IntNo == Intrinsic::arm_smlald)
10457 Opc = ARMISD::SMLALD;
10458 else if (IntNo == Intrinsic::arm_smlaldx)
10459 Opc = ARMISD::SMLALDX;
10460 else if (IntNo == Intrinsic::arm_smlsld)
10461 Opc = ARMISD::SMLSLD;
10462 else if (IntNo == Intrinsic::arm_smlsldx)
10463 Opc = ARMISD::SMLSLDX;
10464 else
10465 return;
10466
10467 SDLoc dl(N);
10468 SDValue Lo, Hi;
10469 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(3), dl, MVT::i32, MVT::i32);
10470
10471 SDValue LongMul = DAG.getNode(Opc, dl,
10472 DAG.getVTList(MVT::i32, MVT::i32),
10473 N->getOperand(1), N->getOperand(2),
10474 Lo, Hi);
10475 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
10476 LongMul.getValue(0), LongMul.getValue(1)));
10477}
10478
10479/// ReplaceNodeResults - Replace the results of node with an illegal result
10480/// type with new values built out of custom code.
10483 SelectionDAG &DAG) const {
10484 SDValue Res;
10485 switch (N->getOpcode()) {
10486 default:
10487 llvm_unreachable("Don't know how to custom expand this!");
10488 case ISD::READ_REGISTER:
10490 break;
10491 case ISD::BITCAST:
10492 Res = ExpandBITCAST(N, DAG, Subtarget);
10493 break;
10494 case ISD::SRL:
10495 case ISD::SRA:
10496 case ISD::SHL:
10497 Res = Expand64BitShift(N, DAG, Subtarget);
10498 break;
10499 case ISD::SREM:
10500 case ISD::UREM:
10501 Res = LowerREM(N, DAG);
10502 break;
10503 case ISD::SDIVREM:
10504 case ISD::UDIVREM:
10505 Res = LowerDivRem(SDValue(N, 0), DAG);
10506 assert(Res.getNumOperands() == 2 && "DivRem needs two values");
10507 Results.push_back(Res.getValue(0));
10508 Results.push_back(Res.getValue(1));
10509 return;
10510 case ISD::SADDSAT:
10511 case ISD::SSUBSAT:
10512 case ISD::UADDSAT:
10513 case ISD::USUBSAT:
10514 Res = LowerADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
10515 break;
10516 case ISD::READCYCLECOUNTER:
10517 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
10518 return;
10519 case ISD::UDIV:
10520 case ISD::SDIV:
10521 assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
10522 return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
10523 Results);
10524 case ISD::ATOMIC_CMP_SWAP:
10526 return;
10528 return ReplaceLongIntrinsic(N, Results, DAG);
10529 case ISD::LOAD:
10530 LowerLOAD(N, Results, DAG);
10531 break;
10532 case ISD::TRUNCATE:
10533 Res = LowerTruncate(N, DAG, Subtarget);
10534 break;
10535 case ISD::SIGN_EXTEND:
10536 case ISD::ZERO_EXTEND:
10537 Res = LowerVectorExtend(N, DAG, Subtarget);
10538 break;
10541 Res = LowerFP_TO_INT_SAT(SDValue(N, 0), DAG, Subtarget);
10542 break;
10543 }
10544 if (Res.getNode())
10545 Results.push_back(Res);
10546}
10547
10548//===----------------------------------------------------------------------===//
10549// ARM Scheduler Hooks
10550//===----------------------------------------------------------------------===//
10551
10552/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
10553/// registers the function context.
10554void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
10556 MachineBasicBlock *DispatchBB,
10557 int FI) const {
10558 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
10559 "ROPI/RWPI not currently supported with SjLj");
10560 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10561 DebugLoc dl = MI.getDebugLoc();
10562 MachineFunction *MF = MBB->getParent();
10566 const Function &F = MF->getFunction();
10567
10568 bool isThumb = Subtarget->isThumb();
10569 bool isThumb2 = Subtarget->isThumb2();
10570
10571 unsigned PCLabelId = AFI->createPICLabelUId();
10572 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
10574 ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
10575 unsigned CPI = MCP->getConstantPoolIndex(CPV, Align(4));
10576
10577 const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
10578 : &ARM::GPRRegClass;
10579
10580 // Grab constant pool and fixed stack memory operands.
10581 MachineMemOperand *CPMMO =
10584
10585 MachineMemOperand *FIMMOSt =
10588
10589 // Load the address of the dispatch MBB into the jump buffer.
10590 if (isThumb2) {
10591 // Incoming value: jbuf
10592 // ldr.n r5, LCPI1_1
10593 // orr r5, r5, #1
10594 // add r5, pc
10595 // str r5, [$jbuf, #+4] ; &jbuf[1]
10596 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10597 BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
10599 .addMemOperand(CPMMO)
10601 // Set the low bit because of thumb mode.
10602 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10603 BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
10604 .addReg(NewVReg1, RegState::Kill)
10605 .addImm(0x01)
10607 .add(condCodeOp());
10608 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10609 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
10610 .addReg(NewVReg2, RegState::Kill)
10611 .addImm(PCLabelId);
10612 BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
10613 .addReg(NewVReg3, RegState::Kill)
10614 .addFrameIndex(FI)
10615 .addImm(36) // &jbuf[1] :: pc
10616 .addMemOperand(FIMMOSt)
10618 } else if (isThumb) {
10619 // Incoming value: jbuf
10620 // ldr.n r1, LCPI1_4
10621 // add r1, pc
10622 // mov r2, #1
10623 // orrs r1, r2
10624 // add r2, $jbuf, #+4 ; &jbuf[1]
10625 // str r1, [r2]
10626 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10627 BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
10629 .addMemOperand(CPMMO)
10631 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10632 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
10633 .addReg(NewVReg1, RegState::Kill)
10634 .addImm(PCLabelId);
10635 // Set the low bit because of thumb mode.
10636 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10637 BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
10638 .addReg(ARM::CPSR, RegState::Define)
10639 .addImm(1)
10641 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10642 BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
10643 .addReg(ARM::CPSR, RegState::Define)
10644 .addReg(NewVReg2, RegState::Kill)
10645 .addReg(NewVReg3, RegState::Kill)
10647 Register NewVReg5 = MRI->createVirtualRegister(TRC);
10648 BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
10649 .addFrameIndex(FI)
10650 .addImm(36); // &jbuf[1] :: pc
10651 BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
10652 .addReg(NewVReg4, RegState::Kill)
10653 .addReg(NewVReg5, RegState::Kill)
10654 .addImm(0)
10655 .addMemOperand(FIMMOSt)
10657 } else {
10658 // Incoming value: jbuf
10659 // ldr r1, LCPI1_1
10660 // add r1, pc, r1
10661 // str r1, [$jbuf, #+4] ; &jbuf[1]
10662 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10663 BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
10665 .addImm(0)
10666 .addMemOperand(CPMMO)
10668 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10669 BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
10670 .addReg(NewVReg1, RegState::Kill)
10671 .addImm(PCLabelId)
10673 BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
10674 .addReg(NewVReg2, RegState::Kill)
10675 .addFrameIndex(FI)
10676 .addImm(36) // &jbuf[1] :: pc
10677 .addMemOperand(FIMMOSt)
10679 }
10680}
10681
10682void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
10683 MachineBasicBlock *MBB) const {
10684 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10685 DebugLoc dl = MI.getDebugLoc();
10686 MachineFunction *MF = MBB->getParent();
10687 MachineRegisterInfo *MRI = &MF->getRegInfo();
10688 MachineFrameInfo &MFI = MF->getFrameInfo();
10689 int FI = MFI.getFunctionContextIndex();
10690
10691 const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
10692 : &ARM::GPRnopcRegClass;
10693
10694 // Get a mapping of the call site numbers to all of the landing pads they're
10695 // associated with.
10696 DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2>> CallSiteNumToLPad;
10697 unsigned MaxCSNum = 0;
10698 for (MachineBasicBlock &BB : *MF) {
10699 if (!BB.isEHPad())
10700 continue;
10701
10702 // FIXME: We should assert that the EH_LABEL is the first MI in the landing
10703 // pad.
10704 for (MachineInstr &II : BB) {
10705 if (!II.isEHLabel())
10706 continue;
10707
10708 MCSymbol *Sym = II.getOperand(0).getMCSymbol();
10709 if (!MF->hasCallSiteLandingPad(Sym)) continue;
10710
10711 SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
10712 for (unsigned Idx : CallSiteIdxs) {
10713 CallSiteNumToLPad[Idx].push_back(&BB);
10714 MaxCSNum = std::max(MaxCSNum, Idx);
10715 }
10716 break;
10717 }
10718 }
10719
10720 // Get an ordered list of the machine basic blocks for the jump table.
10721 std::vector<MachineBasicBlock*> LPadList;
10722 SmallPtrSet<MachineBasicBlock*, 32> InvokeBBs;
10723 LPadList.reserve(CallSiteNumToLPad.size());
10724 for (unsigned I = 1; I <= MaxCSNum; ++I) {
10725 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
10726 for (MachineBasicBlock *MBB : MBBList) {
10727 LPadList.push_back(MBB);
10728 InvokeBBs.insert_range(MBB->predecessors());
10729 }
10730 }
10731
10732 assert(!LPadList.empty() &&
10733 "No landing pad destinations for the dispatch jump table!");
10734
10735 // Create the jump table and associated information.
10736 MachineJumpTableInfo *JTI =
10737 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
10738 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
10739
10740 // Create the MBBs for the dispatch code.
10741
10742 // Shove the dispatch's address into the return slot in the function context.
10743 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
10744 DispatchBB->setIsEHPad();
10745
10746 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
10747
10748 BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP));
10749 DispatchBB->addSuccessor(TrapBB);
10750
10751 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
10752 DispatchBB->addSuccessor(DispContBB);
10753
10754 // Insert and MBBs.
10755 MF->insert(MF->end(), DispatchBB);
10756 MF->insert(MF->end(), DispContBB);
10757 MF->insert(MF->end(), TrapBB);
10758
10759 // Insert code into the entry block that creates and registers the function
10760 // context.
10761 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
10762
10763 MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
10766
10767 MachineInstrBuilder MIB;
10768 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
10769
10770 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
10771 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
10772
10773 // Add a register mask with no preserved registers. This results in all
10774 // registers being marked as clobbered. This can't work if the dispatch block
10775 // is in a Thumb1 function and is linked with ARM code which uses the FP
10776 // registers, as there is no way to preserve the FP registers in Thumb1 mode.
10778
10779 bool IsPositionIndependent = isPositionIndependent();
10780 unsigned NumLPads = LPadList.size();
10781 if (Subtarget->isThumb2()) {
10782 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10783 BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
10784 .addFrameIndex(FI)
10785 .addImm(4)
10786 .addMemOperand(FIMMOLd)
10788
10789 if (NumLPads < 256) {
10790 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
10791 .addReg(NewVReg1)
10792 .addImm(LPadList.size())
10794 } else {
10795 Register VReg1 = MRI->createVirtualRegister(TRC);
10796 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
10797 .addImm(NumLPads & 0xFFFF)
10799
10800 unsigned VReg2 = VReg1;
10801 if ((NumLPads & 0xFFFF0000) != 0) {
10802 VReg2 = MRI->createVirtualRegister(TRC);
10803 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
10804 .addReg(VReg1)
10805 .addImm(NumLPads >> 16)
10807 }
10808
10809 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
10810 .addReg(NewVReg1)
10811 .addReg(VReg2)
10813 }
10814
10815 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
10816 .addMBB(TrapBB)
10818 .addReg(ARM::CPSR);
10819
10820 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10821 BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
10822 .addJumpTableIndex(MJTI)
10824
10825 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10826 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
10827 .addReg(NewVReg3, RegState::Kill)
10828 .addReg(NewVReg1)
10831 .add(condCodeOp());
10832
10833 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
10834 .addReg(NewVReg4, RegState::Kill)
10835 .addReg(NewVReg1)
10836 .addJumpTableIndex(MJTI);
10837 } else if (Subtarget->isThumb()) {
10838 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10839 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
10840 .addFrameIndex(FI)
10841 .addImm(1)
10842 .addMemOperand(FIMMOLd)
10844
10845 if (NumLPads < 256) {
10846 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
10847 .addReg(NewVReg1)
10848 .addImm(NumLPads)
10850 } else {
10851 MachineConstantPool *ConstantPool = MF->getConstantPool();
10852 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
10853 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
10854
10855 // MachineConstantPool wants an explicit alignment.
10856 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
10857 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
10858
10859 Register VReg1 = MRI->createVirtualRegister(TRC);
10860 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
10861 .addReg(VReg1, RegState::Define)
10864 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
10865 .addReg(NewVReg1)
10866 .addReg(VReg1)
10868 }
10869
10870 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
10871 .addMBB(TrapBB)
10873 .addReg(ARM::CPSR);
10874
10875 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10876 BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
10877 .addReg(ARM::CPSR, RegState::Define)
10878 .addReg(NewVReg1)
10879 .addImm(2)
10881
10882 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10883 BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
10884 .addJumpTableIndex(MJTI)
10886
10887 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10888 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
10889 .addReg(ARM::CPSR, RegState::Define)
10890 .addReg(NewVReg2, RegState::Kill)
10891 .addReg(NewVReg3)
10893
10894 MachineMemOperand *JTMMOLd =
10895 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
10897
10898 Register NewVReg5 = MRI->createVirtualRegister(TRC);
10899 BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
10900 .addReg(NewVReg4, RegState::Kill)
10901 .addImm(0)
10902 .addMemOperand(JTMMOLd)
10904
10905 unsigned NewVReg6 = NewVReg5;
10906 if (IsPositionIndependent) {
10907 NewVReg6 = MRI->createVirtualRegister(TRC);
10908 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
10909 .addReg(ARM::CPSR, RegState::Define)
10910 .addReg(NewVReg5, RegState::Kill)
10911 .addReg(NewVReg3)
10913 }
10914
10915 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
10916 .addReg(NewVReg6, RegState::Kill)
10917 .addJumpTableIndex(MJTI);
10918 } else {
10919 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10920 BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
10921 .addFrameIndex(FI)
10922 .addImm(4)
10923 .addMemOperand(FIMMOLd)
10925
10926 if (NumLPads < 256) {
10927 BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
10928 .addReg(NewVReg1)
10929 .addImm(NumLPads)
10931 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
10932 Register VReg1 = MRI->createVirtualRegister(TRC);
10933 BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
10934 .addImm(NumLPads & 0xFFFF)
10936
10937 unsigned VReg2 = VReg1;
10938 if ((NumLPads & 0xFFFF0000) != 0) {
10939 VReg2 = MRI->createVirtualRegister(TRC);
10940 BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
10941 .addReg(VReg1)
10942 .addImm(NumLPads >> 16)
10944 }
10945
10946 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
10947 .addReg(NewVReg1)
10948 .addReg(VReg2)
10950 } else {
10951 MachineConstantPool *ConstantPool = MF->getConstantPool();
10952 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
10953 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
10954
10955 // MachineConstantPool wants an explicit alignment.
10956 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
10957 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
10958
10959 Register VReg1 = MRI->createVirtualRegister(TRC);
10960 BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
10961 .addReg(VReg1, RegState::Define)
10963 .addImm(0)
10965 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
10966 .addReg(NewVReg1)
10967 .addReg(VReg1, RegState::Kill)
10969 }
10970
10971 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
10972 .addMBB(TrapBB)
10974 .addReg(ARM::CPSR);
10975
10976 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10977 BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
10978 .addReg(NewVReg1)
10981 .add(condCodeOp());
10982 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10983 BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
10984 .addJumpTableIndex(MJTI)
10986
10987 MachineMemOperand *JTMMOLd =
10988 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
10990 Register NewVReg5 = MRI->createVirtualRegister(TRC);
10991 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
10992 .addReg(NewVReg3, RegState::Kill)
10993 .addReg(NewVReg4)
10994 .addImm(0)
10995 .addMemOperand(JTMMOLd)
10997
10998 if (IsPositionIndependent) {
10999 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
11000 .addReg(NewVReg5, RegState::Kill)
11001 .addReg(NewVReg4)
11002 .addJumpTableIndex(MJTI);
11003 } else {
11004 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
11005 .addReg(NewVReg5, RegState::Kill)
11006 .addJumpTableIndex(MJTI);
11007 }
11008 }
11009
11010 // Add the jump table entries as successors to the MBB.
11011 SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
11012 for (MachineBasicBlock *CurMBB : LPadList) {
11013 if (SeenMBBs.insert(CurMBB).second)
11014 DispContBB->addSuccessor(CurMBB);
11015 }
11016
11017 // N.B. the order the invoke BBs are processed in doesn't matter here.
11018 const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
11020 for (MachineBasicBlock *BB : InvokeBBs) {
11021
11022 // Remove the landing pad successor from the invoke block and replace it
11023 // with the new dispatch block.
11024 SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
11025 while (!Successors.empty()) {
11026 MachineBasicBlock *SMBB = Successors.pop_back_val();
11027 if (SMBB->isEHPad()) {
11028 BB->removeSuccessor(SMBB);
11029 MBBLPads.push_back(SMBB);
11030 }
11031 }
11032
11033 BB->addSuccessor(DispatchBB, BranchProbability::getZero());
11034 BB->normalizeSuccProbs();
11035
11036 // Find the invoke call and mark all of the callee-saved registers as
11037 // 'implicit defined' so that they're spilled. This prevents code from
11038 // moving instructions to before the EH block, where they will never be
11039 // executed.
11041 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
11042 if (!II->isCall()) continue;
11043
11044 DenseSet<unsigned> DefRegs;
11046 OI = II->operands_begin(), OE = II->operands_end();
11047 OI != OE; ++OI) {
11048 if (!OI->isReg()) continue;
11049 DefRegs.insert(OI->getReg());
11050 }
11051
11052 MachineInstrBuilder MIB(*MF, &*II);
11053
11054 for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
11055 unsigned Reg = SavedRegs[i];
11056 if (Subtarget->isThumb2() &&
11057 !ARM::tGPRRegClass.contains(Reg) &&
11058 !ARM::hGPRRegClass.contains(Reg))
11059 continue;
11060 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
11061 continue;
11062 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
11063 continue;
11064 if (!DefRegs.contains(Reg))
11066 }
11067
11068 break;
11069 }
11070 }
11071
11072 // Mark all former landing pads as non-landing pads. The dispatch is the only
11073 // landing pad now.
11074 for (MachineBasicBlock *MBBLPad : MBBLPads)
11075 MBBLPad->setIsEHPad(false);
11076
11077 // The instruction is gone now.
11078 MI.eraseFromParent();
11079}
11080
11081static
11083 for (MachineBasicBlock *S : MBB->successors())
11084 if (S != Succ)
11085 return S;
11086 llvm_unreachable("Expecting a BB with two successors!");
11087}
11088
11089/// Return the load opcode for a given load size. If load size >= 8,
11090/// neon opcode will be returned.
11091static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
11092 if (LdSize >= 8)
11093 return LdSize == 16 ? ARM::VLD1q32wb_fixed
11094 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
11095 if (IsThumb1)
11096 return LdSize == 4 ? ARM::tLDRi
11097 : LdSize == 2 ? ARM::tLDRHi
11098 : LdSize == 1 ? ARM::tLDRBi : 0;
11099 if (IsThumb2)
11100 return LdSize == 4 ? ARM::t2LDR_POST
11101 : LdSize == 2 ? ARM::t2LDRH_POST
11102 : LdSize == 1 ? ARM::t2LDRB_POST : 0;
11103 return LdSize == 4 ? ARM::LDR_POST_IMM
11104 : LdSize == 2 ? ARM::LDRH_POST
11105 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
11106}
11107
11108/// Return the store opcode for a given store size. If store size >= 8,
11109/// neon opcode will be returned.
11110static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
11111 if (StSize >= 8)
11112 return StSize == 16 ? ARM::VST1q32wb_fixed
11113 : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
11114 if (IsThumb1)
11115 return StSize == 4 ? ARM::tSTRi
11116 : StSize == 2 ? ARM::tSTRHi
11117 : StSize == 1 ? ARM::tSTRBi : 0;
11118 if (IsThumb2)
11119 return StSize == 4 ? ARM::t2STR_POST
11120 : StSize == 2 ? ARM::t2STRH_POST
11121 : StSize == 1 ? ARM::t2STRB_POST : 0;
11122 return StSize == 4 ? ARM::STR_POST_IMM
11123 : StSize == 2 ? ARM::STRH_POST
11124 : StSize == 1 ? ARM::STRB_POST_IMM : 0;
11125}
11126
11127/// Emit a post-increment load operation with given size. The instructions
11128/// will be added to BB at Pos.
11130 const TargetInstrInfo *TII, const DebugLoc &dl,
11131 unsigned LdSize, unsigned Data, unsigned AddrIn,
11132 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11133 unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
11134 assert(LdOpc != 0 && "Should have a load opcode");
11135 if (LdSize >= 8) {
11136 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11137 .addReg(AddrOut, RegState::Define)
11138 .addReg(AddrIn)
11139 .addImm(0)
11141 } else if (IsThumb1) {
11142 // load + update AddrIn
11143 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11144 .addReg(AddrIn)
11145 .addImm(0)
11147 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11148 .add(t1CondCodeOp())
11149 .addReg(AddrIn)
11150 .addImm(LdSize)
11152 } else if (IsThumb2) {
11153 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11154 .addReg(AddrOut, RegState::Define)
11155 .addReg(AddrIn)
11156 .addImm(LdSize)
11158 } else { // arm
11159 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11160 .addReg(AddrOut, RegState::Define)
11161 .addReg(AddrIn)
11162 .addReg(0)
11163 .addImm(LdSize)
11165 }
11166}
11167
11168/// Emit a post-increment store operation with given size. The instructions
11169/// will be added to BB at Pos.
11171 const TargetInstrInfo *TII, const DebugLoc &dl,
11172 unsigned StSize, unsigned Data, unsigned AddrIn,
11173 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11174 unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
11175 assert(StOpc != 0 && "Should have a store opcode");
11176 if (StSize >= 8) {
11177 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11178 .addReg(AddrIn)
11179 .addImm(0)
11180 .addReg(Data)
11182 } else if (IsThumb1) {
11183 // store + update AddrIn
11184 BuildMI(*BB, Pos, dl, TII->get(StOpc))
11185 .addReg(Data)
11186 .addReg(AddrIn)
11187 .addImm(0)
11189 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11190 .add(t1CondCodeOp())
11191 .addReg(AddrIn)
11192 .addImm(StSize)
11194 } else if (IsThumb2) {
11195 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11196 .addReg(Data)
11197 .addReg(AddrIn)
11198 .addImm(StSize)
11200 } else { // arm
11201 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11202 .addReg(Data)
11203 .addReg(AddrIn)
11204 .addReg(0)
11205 .addImm(StSize)
11207 }
11208}
11209
11211ARMTargetLowering::EmitStructByval(MachineInstr &MI,
11212 MachineBasicBlock *BB) const {
11213 // This pseudo instruction has 3 operands: dst, src, size
11214 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
11215 // Otherwise, we will generate unrolled scalar copies.
11216 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11217 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11219
11220 Register dest = MI.getOperand(0).getReg();
11221 Register src = MI.getOperand(1).getReg();
11222 unsigned SizeVal = MI.getOperand(2).getImm();
11223 unsigned Alignment = MI.getOperand(3).getImm();
11224 DebugLoc dl = MI.getDebugLoc();
11225
11226 MachineFunction *MF = BB->getParent();
11227 MachineRegisterInfo &MRI = MF->getRegInfo();
11228 unsigned UnitSize = 0;
11229 const TargetRegisterClass *TRC = nullptr;
11230 const TargetRegisterClass *VecTRC = nullptr;
11231
11232 bool IsThumb1 = Subtarget->isThumb1Only();
11233 bool IsThumb2 = Subtarget->isThumb2();
11234 bool IsThumb = Subtarget->isThumb();
11235
11236 if (Alignment & 1) {
11237 UnitSize = 1;
11238 } else if (Alignment & 2) {
11239 UnitSize = 2;
11240 } else {
11241 // Check whether we can use NEON instructions.
11242 if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
11243 Subtarget->hasNEON()) {
11244 if ((Alignment % 16 == 0) && SizeVal >= 16)
11245 UnitSize = 16;
11246 else if ((Alignment % 8 == 0) && SizeVal >= 8)
11247 UnitSize = 8;
11248 }
11249 // Can't use NEON instructions.
11250 if (UnitSize == 0)
11251 UnitSize = 4;
11252 }
11253
11254 // Select the correct opcode and register class for unit size load/store
11255 bool IsNeon = UnitSize >= 8;
11256 TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
11257 if (IsNeon)
11258 VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
11259 : UnitSize == 8 ? &ARM::DPRRegClass
11260 : nullptr;
11261
11262 unsigned BytesLeft = SizeVal % UnitSize;
11263 unsigned LoopSize = SizeVal - BytesLeft;
11264
11265 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
11266 // Use LDR and STR to copy.
11267 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
11268 // [destOut] = STR_POST(scratch, destIn, UnitSize)
11269 unsigned srcIn = src;
11270 unsigned destIn = dest;
11271 for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
11272 Register srcOut = MRI.createVirtualRegister(TRC);
11273 Register destOut = MRI.createVirtualRegister(TRC);
11274 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11275 emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
11276 IsThumb1, IsThumb2);
11277 emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
11278 IsThumb1, IsThumb2);
11279 srcIn = srcOut;
11280 destIn = destOut;
11281 }
11282
11283 // Handle the leftover bytes with LDRB and STRB.
11284 // [scratch, srcOut] = LDRB_POST(srcIn, 1)
11285 // [destOut] = STRB_POST(scratch, destIn, 1)
11286 for (unsigned i = 0; i < BytesLeft; i++) {
11287 Register srcOut = MRI.createVirtualRegister(TRC);
11288 Register destOut = MRI.createVirtualRegister(TRC);
11289 Register scratch = MRI.createVirtualRegister(TRC);
11290 emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
11291 IsThumb1, IsThumb2);
11292 emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
11293 IsThumb1, IsThumb2);
11294 srcIn = srcOut;
11295 destIn = destOut;
11296 }
11297 MI.eraseFromParent(); // The instruction is gone now.
11298 return BB;
11299 }
11300
11301 // Expand the pseudo op to a loop.
11302 // thisMBB:
11303 // ...
11304 // movw varEnd, # --> with thumb2
11305 // movt varEnd, #
11306 // ldrcp varEnd, idx --> without thumb2
11307 // fallthrough --> loopMBB
11308 // loopMBB:
11309 // PHI varPhi, varEnd, varLoop
11310 // PHI srcPhi, src, srcLoop
11311 // PHI destPhi, dst, destLoop
11312 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11313 // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
11314 // subs varLoop, varPhi, #UnitSize
11315 // bne loopMBB
11316 // fallthrough --> exitMBB
11317 // exitMBB:
11318 // epilogue to handle left-over bytes
11319 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11320 // [destOut] = STRB_POST(scratch, destLoop, 1)
11321 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11322 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11323 MF->insert(It, loopMBB);
11324 MF->insert(It, exitMBB);
11325
11326 // Set the call frame size on entry to the new basic blocks.
11327 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
11328 loopMBB->setCallFrameSize(CallFrameSize);
11329 exitMBB->setCallFrameSize(CallFrameSize);
11330
11331 // Transfer the remainder of BB and its successor edges to exitMBB.
11332 exitMBB->splice(exitMBB->begin(), BB,
11333 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11335
11336 // Load an immediate to varEnd.
11337 Register varEnd = MRI.createVirtualRegister(TRC);
11338 if (Subtarget->useMovt()) {
11339 BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi32imm : ARM::MOVi32imm),
11340 varEnd)
11341 .addImm(LoopSize);
11342 } else if (Subtarget->genExecuteOnly()) {
11343 assert(IsThumb && "Non-thumb expected to have used movt");
11344 BuildMI(BB, dl, TII->get(ARM::tMOVi32imm), varEnd).addImm(LoopSize);
11345 } else {
11346 MachineConstantPool *ConstantPool = MF->getConstantPool();
11348 const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
11349
11350 // MachineConstantPool wants an explicit alignment.
11351 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11352 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11353 MachineMemOperand *CPMMO =
11356
11357 if (IsThumb)
11358 BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
11359 .addReg(varEnd, RegState::Define)
11362 .addMemOperand(CPMMO);
11363 else
11364 BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
11365 .addReg(varEnd, RegState::Define)
11367 .addImm(0)
11369 .addMemOperand(CPMMO);
11370 }
11371 BB->addSuccessor(loopMBB);
11372
11373 // Generate the loop body:
11374 // varPhi = PHI(varLoop, varEnd)
11375 // srcPhi = PHI(srcLoop, src)
11376 // destPhi = PHI(destLoop, dst)
11377 MachineBasicBlock *entryBB = BB;
11378 BB = loopMBB;
11379 Register varLoop = MRI.createVirtualRegister(TRC);
11380 Register varPhi = MRI.createVirtualRegister(TRC);
11381 Register srcLoop = MRI.createVirtualRegister(TRC);
11382 Register srcPhi = MRI.createVirtualRegister(TRC);
11383 Register destLoop = MRI.createVirtualRegister(TRC);
11384 Register destPhi = MRI.createVirtualRegister(TRC);
11385
11386 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
11387 .addReg(varLoop).addMBB(loopMBB)
11388 .addReg(varEnd).addMBB(entryBB);
11389 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
11390 .addReg(srcLoop).addMBB(loopMBB)
11391 .addReg(src).addMBB(entryBB);
11392 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
11393 .addReg(destLoop).addMBB(loopMBB)
11394 .addReg(dest).addMBB(entryBB);
11395
11396 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11397 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
11398 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11399 emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
11400 IsThumb1, IsThumb2);
11401 emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
11402 IsThumb1, IsThumb2);
11403
11404 // Decrement loop variable by UnitSize.
11405 if (IsThumb1) {
11406 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
11407 .add(t1CondCodeOp())
11408 .addReg(varPhi)
11409 .addImm(UnitSize)
11411 } else {
11412 MachineInstrBuilder MIB =
11413 BuildMI(*BB, BB->end(), dl,
11414 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
11415 MIB.addReg(varPhi)
11416 .addImm(UnitSize)
11418 .add(condCodeOp());
11419 MIB->getOperand(5).setReg(ARM::CPSR);
11420 MIB->getOperand(5).setIsDef(true);
11421 }
11422 BuildMI(*BB, BB->end(), dl,
11423 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
11424 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
11425
11426 // loopMBB can loop back to loopMBB or fall through to exitMBB.
11427 BB->addSuccessor(loopMBB);
11428 BB->addSuccessor(exitMBB);
11429
11430 // Add epilogue to handle BytesLeft.
11431 BB = exitMBB;
11432 auto StartOfExit = exitMBB->begin();
11433
11434 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11435 // [destOut] = STRB_POST(scratch, destLoop, 1)
11436 unsigned srcIn = srcLoop;
11437 unsigned destIn = destLoop;
11438 for (unsigned i = 0; i < BytesLeft; i++) {
11439 Register srcOut = MRI.createVirtualRegister(TRC);
11440 Register destOut = MRI.createVirtualRegister(TRC);
11441 Register scratch = MRI.createVirtualRegister(TRC);
11442 emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
11443 IsThumb1, IsThumb2);
11444 emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
11445 IsThumb1, IsThumb2);
11446 srcIn = srcOut;
11447 destIn = destOut;
11448 }
11449
11450 MI.eraseFromParent(); // The instruction is gone now.
11451 return BB;
11452}
11453
11455ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
11456 MachineBasicBlock *MBB) const {
11457 const TargetMachine &TM = getTargetMachine();
11458 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
11459 DebugLoc DL = MI.getDebugLoc();
11460
11461 assert(Subtarget->isTargetWindows() &&
11462 "__chkstk is only supported on Windows");
11463 assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
11464
11465 // __chkstk takes the number of words to allocate on the stack in R4, and
11466 // returns the stack adjustment in number of bytes in R4. This will not
11467 // clober any other registers (other than the obvious lr).
11468 //
11469 // Although, technically, IP should be considered a register which may be
11470 // clobbered, the call itself will not touch it. Windows on ARM is a pure
11471 // thumb-2 environment, so there is no interworking required. As a result, we
11472 // do not expect a veneer to be emitted by the linker, clobbering IP.
11473 //
11474 // Each module receives its own copy of __chkstk, so no import thunk is
11475 // required, again, ensuring that IP is not clobbered.
11476 //
11477 // Finally, although some linkers may theoretically provide a trampoline for
11478 // out of range calls (which is quite common due to a 32M range limitation of
11479 // branches for Thumb), we can generate the long-call version via
11480 // -mcmodel=large, alleviating the need for the trampoline which may clobber
11481 // IP.
11482
11483 switch (TM.getCodeModel()) {
11484 case CodeModel::Tiny:
11485 llvm_unreachable("Tiny code model not available on ARM.");
11486 case CodeModel::Small:
11487 case CodeModel::Medium:
11488 case CodeModel::Kernel:
11489 BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
11491 .addExternalSymbol("__chkstk")
11494 .addReg(ARM::R12,
11496 .addReg(ARM::CPSR,
11498 break;
11499 case CodeModel::Large: {
11500 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
11501 Register Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11502
11503 BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
11504 .addExternalSymbol("__chkstk");
11510 .addReg(ARM::R12,
11512 .addReg(ARM::CPSR,
11514 break;
11515 }
11516 }
11517
11518 BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
11519 .addReg(ARM::SP, RegState::Kill)
11520 .addReg(ARM::R4, RegState::Kill)
11523 .add(condCodeOp());
11524
11525 MI.eraseFromParent();
11526 return MBB;
11527}
11528
11530ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
11531 MachineBasicBlock *MBB) const {
11532 DebugLoc DL = MI.getDebugLoc();
11533 MachineFunction *MF = MBB->getParent();
11534 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11535
11536 MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock();
11537 MF->insert(++MBB->getIterator(), ContBB);
11538 ContBB->splice(ContBB->begin(), MBB,
11539 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11541 MBB->addSuccessor(ContBB);
11542
11543 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
11544 BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
11545 MF->push_back(TrapBB);
11546 MBB->addSuccessor(TrapBB);
11547
11548 BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
11549 .addReg(MI.getOperand(0).getReg())
11550 .addImm(0)
11552 BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
11553 .addMBB(TrapBB)
11555 .addReg(ARM::CPSR);
11556
11557 MI.eraseFromParent();
11558 return ContBB;
11559}
11560
11561// The CPSR operand of SelectItr might be missing a kill marker
11562// because there were multiple uses of CPSR, and ISel didn't know
11563// which to mark. Figure out whether SelectItr should have had a
11564// kill marker, and set it if it should. Returns the correct kill
11565// marker value.
11568 const TargetRegisterInfo* TRI) {
11569 // Scan forward through BB for a use/def of CPSR.
11570 MachineBasicBlock::iterator miI(std::next(SelectItr));
11571 for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
11572 const MachineInstr& mi = *miI;
11573 if (mi.readsRegister(ARM::CPSR, /*TRI=*/nullptr))
11574 return false;
11575 if (mi.definesRegister(ARM::CPSR, /*TRI=*/nullptr))
11576 break; // Should have kill-flag - update below.
11577 }
11578
11579 // If we hit the end of the block, check whether CPSR is live into a
11580 // successor.
11581 if (miI == BB->end()) {
11582 for (MachineBasicBlock *Succ : BB->successors())
11583 if (Succ->isLiveIn(ARM::CPSR))
11584 return false;
11585 }
11586
11587 // We found a def, or hit the end of the basic block and CPSR wasn't live
11588 // out. SelectMI should have a kill flag on CPSR.
11589 SelectItr->addRegisterKilled(ARM::CPSR, TRI);
11590 return true;
11591}
11592
11593/// Adds logic in loop entry MBB to calculate loop iteration count and adds
11594/// t2WhileLoopSetup and t2WhileLoopStart to generate WLS loop
11596 MachineBasicBlock *TpLoopBody,
11597 MachineBasicBlock *TpExit, Register OpSizeReg,
11598 const TargetInstrInfo *TII, DebugLoc Dl,
11600 // Calculates loop iteration count = ceil(n/16) = (n + 15) >> 4.
11601 Register AddDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11602 BuildMI(TpEntry, Dl, TII->get(ARM::t2ADDri), AddDestReg)
11603 .addUse(OpSizeReg)
11604 .addImm(15)
11606 .addReg(0);
11607
11608 Register LsrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11609 BuildMI(TpEntry, Dl, TII->get(ARM::t2LSRri), LsrDestReg)
11610 .addUse(AddDestReg, RegState::Kill)
11611 .addImm(4)
11613 .addReg(0);
11614
11615 Register TotalIterationsReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11616 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopSetup), TotalIterationsReg)
11617 .addUse(LsrDestReg, RegState::Kill);
11618
11619 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopStart))
11620 .addUse(TotalIterationsReg)
11621 .addMBB(TpExit);
11622
11623 BuildMI(TpEntry, Dl, TII->get(ARM::t2B))
11624 .addMBB(TpLoopBody)
11626
11627 return TotalIterationsReg;
11628}
11629
11630/// Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and
11631/// t2DoLoopEnd. These are used by later passes to generate tail predicated
11632/// loops.
11633static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
11634 MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit,
11635 const TargetInstrInfo *TII, DebugLoc Dl,
11636 MachineRegisterInfo &MRI, Register OpSrcReg,
11637 Register OpDestReg, Register ElementCountReg,
11638 Register TotalIterationsReg, bool IsMemcpy) {
11639 // First insert 4 PHI nodes for: Current pointer to Src (if memcpy), Dest
11640 // array, loop iteration counter, predication counter.
11641
11642 Register SrcPhiReg, CurrSrcReg;
11643 if (IsMemcpy) {
11644 // Current position in the src array
11645 SrcPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11646 CurrSrcReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11647 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), SrcPhiReg)
11648 .addUse(OpSrcReg)
11649 .addMBB(TpEntry)
11650 .addUse(CurrSrcReg)
11651 .addMBB(TpLoopBody);
11652 }
11653
11654 // Current position in the dest array
11655 Register DestPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11656 Register CurrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11657 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), DestPhiReg)
11658 .addUse(OpDestReg)
11659 .addMBB(TpEntry)
11660 .addUse(CurrDestReg)
11661 .addMBB(TpLoopBody);
11662
11663 // Current loop counter
11664 Register LoopCounterPhiReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11665 Register RemainingLoopIterationsReg =
11666 MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11667 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), LoopCounterPhiReg)
11668 .addUse(TotalIterationsReg)
11669 .addMBB(TpEntry)
11670 .addUse(RemainingLoopIterationsReg)
11671 .addMBB(TpLoopBody);
11672
11673 // Predication counter
11674 Register PredCounterPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11675 Register RemainingElementsReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11676 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), PredCounterPhiReg)
11677 .addUse(ElementCountReg)
11678 .addMBB(TpEntry)
11679 .addUse(RemainingElementsReg)
11680 .addMBB(TpLoopBody);
11681
11682 // Pass predication counter to VCTP
11683 Register VccrReg = MRI.createVirtualRegister(&ARM::VCCRRegClass);
11684 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)
11685 .addUse(PredCounterPhiReg)
11687 .addReg(0)
11688 .addReg(0);
11689
11690 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)
11691 .addUse(PredCounterPhiReg)
11692 .addImm(16)
11694 .addReg(0);
11695
11696 // VLDRB (only if memcpy) and VSTRB instructions, predicated using VPR
11697 Register SrcValueReg;
11698 if (IsMemcpy) {
11699 SrcValueReg = MRI.createVirtualRegister(&ARM::MQPRRegClass);
11700 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VLDRBU8_post))
11701 .addDef(CurrSrcReg)
11702 .addDef(SrcValueReg)
11703 .addReg(SrcPhiReg)
11704 .addImm(16)
11706 .addUse(VccrReg)
11707 .addReg(0);
11708 } else
11709 SrcValueReg = OpSrcReg;
11710
11711 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VSTRBU8_post))
11712 .addDef(CurrDestReg)
11713 .addUse(SrcValueReg)
11714 .addReg(DestPhiReg)
11715 .addImm(16)
11717 .addUse(VccrReg)
11718 .addReg(0);
11719
11720 // Add the pseudoInstrs for decrementing the loop counter and marking the
11721 // end:t2DoLoopDec and t2DoLoopEnd
11722 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopDec), RemainingLoopIterationsReg)
11723 .addUse(LoopCounterPhiReg)
11724 .addImm(1);
11725
11726 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopEnd))
11727 .addUse(RemainingLoopIterationsReg)
11728 .addMBB(TpLoopBody);
11729
11730 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2B))
11731 .addMBB(TpExit)
11733}
11734
11736 // KCFI is supported in all ARM/Thumb modes
11737 return true;
11738}
11739
11743 const TargetInstrInfo *TII) const {
11744 assert(MBBI->isCall() && MBBI->getCFIType() &&
11745 "Invalid call instruction for a KCFI check");
11746
11747 MachineOperand *TargetOp = nullptr;
11748 switch (MBBI->getOpcode()) {
11749 // ARM mode opcodes
11750 case ARM::BLX:
11751 case ARM::BLX_pred:
11752 case ARM::BLX_noip:
11753 case ARM::BLX_pred_noip:
11754 case ARM::BX_CALL:
11755 TargetOp = &MBBI->getOperand(0);
11756 break;
11757 case ARM::TCRETURNri:
11758 case ARM::TCRETURNrinotr12:
11759 case ARM::TAILJMPr:
11760 case ARM::TAILJMPr4:
11761 TargetOp = &MBBI->getOperand(0);
11762 break;
11763 // Thumb mode opcodes (Thumb1 and Thumb2)
11764 // Note: Most Thumb call instructions have predicate operands before the
11765 // target register Format: tBLXr pred, predreg, target_register, ...
11766 case ARM::tBLXr: // Thumb1/Thumb2: BLX register (requires V5T)
11767 case ARM::tBLXr_noip: // Thumb1/Thumb2: BLX register, no IP clobber
11768 case ARM::tBX_CALL: // Thumb1 only: BX call (push LR, BX)
11769 TargetOp = &MBBI->getOperand(2);
11770 break;
11771 // Tail call instructions don't have predicates, target is operand 0
11772 case ARM::tTAILJMPr: // Thumb1/Thumb2: Tail call via register
11773 TargetOp = &MBBI->getOperand(0);
11774 break;
11775 default:
11776 llvm_unreachable("Unexpected CFI call opcode");
11777 }
11778
11779 assert(TargetOp && TargetOp->isReg() && "Invalid target operand");
11780 TargetOp->setIsRenamable(false);
11781
11782 // Select the appropriate KCFI_CHECK variant based on the instruction set
11783 unsigned KCFICheckOpcode;
11784 if (Subtarget->isThumb()) {
11785 if (Subtarget->isThumb2()) {
11786 KCFICheckOpcode = ARM::KCFI_CHECK_Thumb2;
11787 } else {
11788 KCFICheckOpcode = ARM::KCFI_CHECK_Thumb1;
11789 }
11790 } else {
11791 KCFICheckOpcode = ARM::KCFI_CHECK_ARM;
11792 }
11793
11794 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(KCFICheckOpcode))
11795 .addReg(TargetOp->getReg())
11796 .addImm(MBBI->getCFIType())
11797 .getInstr();
11798}
11799
11802 MachineBasicBlock *BB) const {
11803 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11804 DebugLoc dl = MI.getDebugLoc();
11805 bool isThumb2 = Subtarget->isThumb2();
11806 switch (MI.getOpcode()) {
11807 default: {
11808 MI.print(errs());
11809 llvm_unreachable("Unexpected instr type to insert");
11810 }
11811
11812 // Thumb1 post-indexed loads are really just single-register LDMs.
11813 case ARM::tLDR_postidx: {
11814 MachineOperand Def(MI.getOperand(1));
11815 BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
11816 .add(Def) // Rn_wb
11817 .add(MI.getOperand(2)) // Rn
11818 .add(MI.getOperand(3)) // PredImm
11819 .add(MI.getOperand(4)) // PredReg
11820 .add(MI.getOperand(0)) // Rt
11821 .cloneMemRefs(MI);
11822 MI.eraseFromParent();
11823 return BB;
11824 }
11825
11826 case ARM::MVE_MEMCPYLOOPINST:
11827 case ARM::MVE_MEMSETLOOPINST: {
11828
11829 // Transformation below expands MVE_MEMCPYLOOPINST/MVE_MEMSETLOOPINST Pseudo
11830 // into a Tail Predicated (TP) Loop. It adds the instructions to calculate
11831 // the iteration count =ceil(size_in_bytes/16)) in the TP entry block and
11832 // adds the relevant instructions in the TP loop Body for generation of a
11833 // WLSTP loop.
11834
11835 // Below is relevant portion of the CFG after the transformation.
11836 // The Machine Basic Blocks are shown along with branch conditions (in
11837 // brackets). Note that TP entry/exit MBBs depict the entry/exit of this
11838 // portion of the CFG and may not necessarily be the entry/exit of the
11839 // function.
11840
11841 // (Relevant) CFG after transformation:
11842 // TP entry MBB
11843 // |
11844 // |-----------------|
11845 // (n <= 0) (n > 0)
11846 // | |
11847 // | TP loop Body MBB<--|
11848 // | | |
11849 // \ |___________|
11850 // \ /
11851 // TP exit MBB
11852
11853 MachineFunction *MF = BB->getParent();
11854 MachineFunctionProperties &Properties = MF->getProperties();
11856
11857 Register OpDestReg = MI.getOperand(0).getReg();
11858 Register OpSrcReg = MI.getOperand(1).getReg();
11859 Register OpSizeReg = MI.getOperand(2).getReg();
11860
11861 // Allocate the required MBBs and add to parent function.
11862 MachineBasicBlock *TpEntry = BB;
11863 MachineBasicBlock *TpLoopBody = MF->CreateMachineBasicBlock();
11864 MachineBasicBlock *TpExit;
11865
11866 MF->push_back(TpLoopBody);
11867
11868 // If any instructions are present in the current block after
11869 // MVE_MEMCPYLOOPINST or MVE_MEMSETLOOPINST, split the current block and
11870 // move the instructions into the newly created exit block. If there are no
11871 // instructions add an explicit branch to the FallThrough block and then
11872 // split.
11873 //
11874 // The split is required for two reasons:
11875 // 1) A terminator(t2WhileLoopStart) will be placed at that site.
11876 // 2) Since a TPLoopBody will be added later, any phis in successive blocks
11877 // need to be updated. splitAt() already handles this.
11878 TpExit = BB->splitAt(MI, false);
11879 if (TpExit == BB) {
11880 assert(BB->canFallThrough() && "Exit Block must be Fallthrough of the "
11881 "block containing memcpy/memset Pseudo");
11882 TpExit = BB->getFallThrough();
11883 BuildMI(BB, dl, TII->get(ARM::t2B))
11884 .addMBB(TpExit)
11886 TpExit = BB->splitAt(MI, false);
11887 }
11888
11889 // Add logic for iteration count
11890 Register TotalIterationsReg =
11891 genTPEntry(TpEntry, TpLoopBody, TpExit, OpSizeReg, TII, dl, MRI);
11892
11893 // Add the vectorized (and predicated) loads/store instructions
11894 bool IsMemcpy = MI.getOpcode() == ARM::MVE_MEMCPYLOOPINST;
11895 genTPLoopBody(TpLoopBody, TpEntry, TpExit, TII, dl, MRI, OpSrcReg,
11896 OpDestReg, OpSizeReg, TotalIterationsReg, IsMemcpy);
11897
11898 // Required to avoid conflict with the MachineVerifier during testing.
11899 Properties.resetNoPHIs();
11900
11901 // Connect the blocks
11902 TpEntry->addSuccessor(TpLoopBody);
11903 TpLoopBody->addSuccessor(TpLoopBody);
11904 TpLoopBody->addSuccessor(TpExit);
11905
11906 // Reorder for a more natural layout
11907 TpLoopBody->moveAfter(TpEntry);
11908 TpExit->moveAfter(TpLoopBody);
11909
11910 // Finally, remove the memcpy Pseudo Instruction
11911 MI.eraseFromParent();
11912
11913 // Return the exit block as it may contain other instructions requiring a
11914 // custom inserter
11915 return TpExit;
11916 }
11917
11918 // The Thumb2 pre-indexed stores have the same MI operands, they just
11919 // define them differently in the .td files from the isel patterns, so
11920 // they need pseudos.
11921 case ARM::t2STR_preidx:
11922 MI.setDesc(TII->get(ARM::t2STR_PRE));
11923 return BB;
11924 case ARM::t2STRB_preidx:
11925 MI.setDesc(TII->get(ARM::t2STRB_PRE));
11926 return BB;
11927 case ARM::t2STRH_preidx:
11928 MI.setDesc(TII->get(ARM::t2STRH_PRE));
11929 return BB;
11930
11931 case ARM::STRi_preidx:
11932 case ARM::STRBi_preidx: {
11933 unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
11934 : ARM::STRB_PRE_IMM;
11935 // Decode the offset.
11936 unsigned Offset = MI.getOperand(4).getImm();
11937 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
11939 if (isSub)
11940 Offset = -Offset;
11941
11942 MachineMemOperand *MMO = *MI.memoperands_begin();
11943 BuildMI(*BB, MI, dl, TII->get(NewOpc))
11944 .add(MI.getOperand(0)) // Rn_wb
11945 .add(MI.getOperand(1)) // Rt
11946 .add(MI.getOperand(2)) // Rn
11947 .addImm(Offset) // offset (skip GPR==zero_reg)
11948 .add(MI.getOperand(5)) // pred
11949 .add(MI.getOperand(6))
11950 .addMemOperand(MMO);
11951 MI.eraseFromParent();
11952 return BB;
11953 }
11954 case ARM::STRr_preidx:
11955 case ARM::STRBr_preidx:
11956 case ARM::STRH_preidx: {
11957 unsigned NewOpc;
11958 switch (MI.getOpcode()) {
11959 default: llvm_unreachable("unexpected opcode!");
11960 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
11961 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
11962 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
11963 }
11964 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
11965 for (const MachineOperand &MO : MI.operands())
11966 MIB.add(MO);
11967 MI.eraseFromParent();
11968 return BB;
11969 }
11970
11971 case ARM::tMOVCCr_pseudo: {
11972 // To "insert" a SELECT_CC instruction, we actually have to insert the
11973 // diamond control-flow pattern. The incoming instruction knows the
11974 // destination vreg to set, the condition code register to branch on, the
11975 // true/false values to select between, and a branch opcode to use.
11976 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11978
11979 // thisMBB:
11980 // ...
11981 // TrueVal = ...
11982 // cmpTY ccX, r1, r2
11983 // bCC copy1MBB
11984 // fallthrough --> copy0MBB
11985 MachineBasicBlock *thisMBB = BB;
11986 MachineFunction *F = BB->getParent();
11987 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
11988 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11989 F->insert(It, copy0MBB);
11990 F->insert(It, sinkMBB);
11991
11992 // Set the call frame size on entry to the new basic blocks.
11993 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
11994 copy0MBB->setCallFrameSize(CallFrameSize);
11995 sinkMBB->setCallFrameSize(CallFrameSize);
11996
11997 // Check whether CPSR is live past the tMOVCCr_pseudo.
11998 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
11999 if (!MI.killsRegister(ARM::CPSR, /*TRI=*/nullptr) &&
12000 !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
12001 copy0MBB->addLiveIn(ARM::CPSR);
12002 sinkMBB->addLiveIn(ARM::CPSR);
12003 }
12004
12005 // Transfer the remainder of BB and its successor edges to sinkMBB.
12006 sinkMBB->splice(sinkMBB->begin(), BB,
12007 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12009
12010 BB->addSuccessor(copy0MBB);
12011 BB->addSuccessor(sinkMBB);
12012
12013 BuildMI(BB, dl, TII->get(ARM::tBcc))
12014 .addMBB(sinkMBB)
12015 .addImm(MI.getOperand(3).getImm())
12016 .addReg(MI.getOperand(4).getReg());
12017
12018 // copy0MBB:
12019 // %FalseValue = ...
12020 // # fallthrough to sinkMBB
12021 BB = copy0MBB;
12022
12023 // Update machine-CFG edges
12024 BB->addSuccessor(sinkMBB);
12025
12026 // sinkMBB:
12027 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12028 // ...
12029 BB = sinkMBB;
12030 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
12031 .addReg(MI.getOperand(1).getReg())
12032 .addMBB(copy0MBB)
12033 .addReg(MI.getOperand(2).getReg())
12034 .addMBB(thisMBB);
12035
12036 MI.eraseFromParent(); // The pseudo instruction is gone now.
12037 return BB;
12038 }
12039
12040 case ARM::BCCi64:
12041 case ARM::BCCZi64: {
12042 // If there is an unconditional branch to the other successor, remove it.
12043 BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
12044
12045 // Compare both parts that make up the double comparison separately for
12046 // equality.
12047 bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
12048
12049 Register LHS1 = MI.getOperand(1).getReg();
12050 Register LHS2 = MI.getOperand(2).getReg();
12051 if (RHSisZero) {
12052 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12053 .addReg(LHS1)
12054 .addImm(0)
12056 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12057 .addReg(LHS2).addImm(0)
12058 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12059 } else {
12060 Register RHS1 = MI.getOperand(3).getReg();
12061 Register RHS2 = MI.getOperand(4).getReg();
12062 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12063 .addReg(LHS1)
12064 .addReg(RHS1)
12066 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12067 .addReg(LHS2).addReg(RHS2)
12068 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12069 }
12070
12071 MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
12072 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
12073 if (MI.getOperand(0).getImm() == ARMCC::NE)
12074 std::swap(destMBB, exitMBB);
12075
12076 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
12077 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
12078 if (isThumb2)
12079 BuildMI(BB, dl, TII->get(ARM::t2B))
12080 .addMBB(exitMBB)
12082 else
12083 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
12084
12085 MI.eraseFromParent(); // The pseudo instruction is gone now.
12086 return BB;
12087 }
12088
12089 case ARM::Int_eh_sjlj_setjmp:
12090 case ARM::Int_eh_sjlj_setjmp_nofp:
12091 case ARM::tInt_eh_sjlj_setjmp:
12092 case ARM::t2Int_eh_sjlj_setjmp:
12093 case ARM::t2Int_eh_sjlj_setjmp_nofp:
12094 return BB;
12095
12096 case ARM::Int_eh_sjlj_setup_dispatch:
12097 EmitSjLjDispatchBlock(MI, BB);
12098 return BB;
12099 case ARM::COPY_STRUCT_BYVAL_I32:
12100 ++NumLoopByVals;
12101 return EmitStructByval(MI, BB);
12102 case ARM::WIN__CHKSTK:
12103 return EmitLowered__chkstk(MI, BB);
12104 case ARM::WIN__DBZCHK:
12105 return EmitLowered__dbzchk(MI, BB);
12106 }
12107}
12108
12109/// Attaches vregs to MEMCPY that it will use as scratch registers
12110/// when it is expanded into LDM/STM. This is done as a post-isel lowering
12111/// instead of as a custom inserter because we need the use list from the SDNode.
12112static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
12113 MachineInstr &MI, const SDNode *Node) {
12114 bool isThumb1 = Subtarget->isThumb1Only();
12115
12116 MachineFunction *MF = MI.getParent()->getParent();
12118 MachineInstrBuilder MIB(*MF, MI);
12119
12120 // If the new dst/src is unused mark it as dead.
12121 if (!Node->hasAnyUseOfValue(0)) {
12122 MI.getOperand(0).setIsDead(true);
12123 }
12124 if (!Node->hasAnyUseOfValue(1)) {
12125 MI.getOperand(1).setIsDead(true);
12126 }
12127
12128 // The MEMCPY both defines and kills the scratch registers.
12129 for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
12130 Register TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
12131 : &ARM::GPRRegClass);
12133 }
12134}
12135
12137 SDNode *Node) const {
12138 if (MI.getOpcode() == ARM::MEMCPY) {
12139 attachMEMCPYScratchRegs(Subtarget, MI, Node);
12140 return;
12141 }
12142
12143 const MCInstrDesc *MCID = &MI.getDesc();
12144 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
12145 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
12146 // operand is still set to noreg. If needed, set the optional operand's
12147 // register to CPSR, and remove the redundant implicit def.
12148 //
12149 // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
12150
12151 // Rename pseudo opcodes.
12152 unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
12153 unsigned ccOutIdx;
12154 if (NewOpc) {
12155 const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
12156 MCID = &TII->get(NewOpc);
12157
12158 assert(MCID->getNumOperands() ==
12159 MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
12160 && "converted opcode should be the same except for cc_out"
12161 " (and, on Thumb1, pred)");
12162
12163 MI.setDesc(*MCID);
12164
12165 // Add the optional cc_out operand
12166 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
12167
12168 // On Thumb1, move all input operands to the end, then add the predicate
12169 if (Subtarget->isThumb1Only()) {
12170 for (unsigned c = MCID->getNumOperands() - 4; c--;) {
12171 MI.addOperand(MI.getOperand(1));
12172 MI.removeOperand(1);
12173 }
12174
12175 // Restore the ties
12176 for (unsigned i = MI.getNumOperands(); i--;) {
12177 const MachineOperand& op = MI.getOperand(i);
12178 if (op.isReg() && op.isUse()) {
12179 int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
12180 if (DefIdx != -1)
12181 MI.tieOperands(DefIdx, i);
12182 }
12183 }
12184
12186 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
12187 ccOutIdx = 1;
12188 } else
12189 ccOutIdx = MCID->getNumOperands() - 1;
12190 } else
12191 ccOutIdx = MCID->getNumOperands() - 1;
12192
12193 // Any ARM instruction that sets the 's' bit should specify an optional
12194 // "cc_out" operand in the last operand position.
12195 if (!MI.hasOptionalDef() || !MCID->operands()[ccOutIdx].isOptionalDef()) {
12196 assert(!NewOpc && "Optional cc_out operand required");
12197 return;
12198 }
12199 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
12200 // since we already have an optional CPSR def.
12201 bool definesCPSR = false;
12202 bool deadCPSR = false;
12203 for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
12204 ++i) {
12205 const MachineOperand &MO = MI.getOperand(i);
12206 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
12207 definesCPSR = true;
12208 if (MO.isDead())
12209 deadCPSR = true;
12210 MI.removeOperand(i);
12211 break;
12212 }
12213 }
12214 if (!definesCPSR) {
12215 assert(!NewOpc && "Optional cc_out operand required");
12216 return;
12217 }
12218 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
12219 if (deadCPSR) {
12220 assert(!MI.getOperand(ccOutIdx).getReg() &&
12221 "expect uninitialized optional cc_out operand");
12222 // Thumb1 instructions must have the S bit even if the CPSR is dead.
12223 if (!Subtarget->isThumb1Only())
12224 return;
12225 }
12226
12227 // If this instruction was defined with an optional CPSR def and its dag node
12228 // had a live implicit CPSR def, then activate the optional CPSR def.
12229 MachineOperand &MO = MI.getOperand(ccOutIdx);
12230 MO.setReg(ARM::CPSR);
12231 MO.setIsDef(true);
12232}
12233
12234//===----------------------------------------------------------------------===//
12235// ARM Optimization Hooks
12236//===----------------------------------------------------------------------===//
12237
12238// Helper function that checks if N is a null or all ones constant.
12239static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
12241}
12242
12243// Return true if N is conditionally 0 or all ones.
12244// Detects these expressions where cc is an i1 value:
12245//
12246// (select cc 0, y) [AllOnes=0]
12247// (select cc y, 0) [AllOnes=0]
12248// (zext cc) [AllOnes=0]
12249// (sext cc) [AllOnes=0/1]
12250// (select cc -1, y) [AllOnes=1]
12251// (select cc y, -1) [AllOnes=1]
12252//
12253// Invert is set when N is the null/all ones constant when CC is false.
12254// OtherOp is set to the alternative value of N.
12256 SDValue &CC, bool &Invert,
12257 SDValue &OtherOp,
12258 SelectionDAG &DAG) {
12259 switch (N->getOpcode()) {
12260 default: return false;
12261 case ISD::SELECT: {
12262 CC = N->getOperand(0);
12263 SDValue N1 = N->getOperand(1);
12264 SDValue N2 = N->getOperand(2);
12265 if (isZeroOrAllOnes(N1, AllOnes)) {
12266 Invert = false;
12267 OtherOp = N2;
12268 return true;
12269 }
12270 if (isZeroOrAllOnes(N2, AllOnes)) {
12271 Invert = true;
12272 OtherOp = N1;
12273 return true;
12274 }
12275 return false;
12276 }
12277 case ISD::ZERO_EXTEND:
12278 // (zext cc) can never be the all ones value.
12279 if (AllOnes)
12280 return false;
12281 [[fallthrough]];
12282 case ISD::SIGN_EXTEND: {
12283 SDLoc dl(N);
12284 EVT VT = N->getValueType(0);
12285 CC = N->getOperand(0);
12286 if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
12287 return false;
12288 Invert = !AllOnes;
12289 if (AllOnes)
12290 // When looking for an AllOnes constant, N is an sext, and the 'other'
12291 // value is 0.
12292 OtherOp = DAG.getConstant(0, dl, VT);
12293 else if (N->getOpcode() == ISD::ZERO_EXTEND)
12294 // When looking for a 0 constant, N can be zext or sext.
12295 OtherOp = DAG.getConstant(1, dl, VT);
12296 else
12297 OtherOp = DAG.getAllOnesConstant(dl, VT);
12298 return true;
12299 }
12300 }
12301}
12302
12303// Combine a constant select operand into its use:
12304//
12305// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
12306// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
12307// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
12308// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
12309// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
12310//
12311// The transform is rejected if the select doesn't have a constant operand that
12312// is null, or all ones when AllOnes is set.
12313//
12314// Also recognize sext/zext from i1:
12315//
12316// (add (zext cc), x) -> (select cc (add x, 1), x)
12317// (add (sext cc), x) -> (select cc (add x, -1), x)
12318//
12319// These transformations eventually create predicated instructions.
12320//
12321// @param N The node to transform.
12322// @param Slct The N operand that is a select.
12323// @param OtherOp The other N operand (x above).
12324// @param DCI Context.
12325// @param AllOnes Require the select constant to be all ones instead of null.
12326// @returns The new node, or SDValue() on failure.
12327static
12330 bool AllOnes = false) {
12331 SelectionDAG &DAG = DCI.DAG;
12332 EVT VT = N->getValueType(0);
12333 SDValue NonConstantVal;
12334 SDValue CCOp;
12335 bool SwapSelectOps;
12336 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
12337 NonConstantVal, DAG))
12338 return SDValue();
12339
12340 // Slct is now know to be the desired identity constant when CC is true.
12341 SDValue TrueVal = OtherOp;
12342 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
12343 OtherOp, NonConstantVal);
12344 // Unless SwapSelectOps says CC should be false.
12345 if (SwapSelectOps)
12346 std::swap(TrueVal, FalseVal);
12347
12348 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12349 CCOp, TrueVal, FalseVal);
12350}
12351
12352// Attempt combineSelectAndUse on each operand of a commutative operator N.
12353static
12356 SDValue N0 = N->getOperand(0);
12357 SDValue N1 = N->getOperand(1);
12358 if (N0.getNode()->hasOneUse())
12359 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
12360 return Result;
12361 if (N1.getNode()->hasOneUse())
12362 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
12363 return Result;
12364 return SDValue();
12365}
12366
12368 // VUZP shuffle node.
12369 if (N->getOpcode() == ARMISD::VUZP)
12370 return true;
12371
12372 // "VUZP" on i32 is an alias for VTRN.
12373 if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
12374 return true;
12375
12376 return false;
12377}
12378
12381 const ARMSubtarget *Subtarget) {
12382 // Look for ADD(VUZP.0, VUZP.1).
12383 if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
12384 N0 == N1)
12385 return SDValue();
12386
12387 // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
12388 if (!N->getValueType(0).is64BitVector())
12389 return SDValue();
12390
12391 // Generate vpadd.
12392 SelectionDAG &DAG = DCI.DAG;
12393 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12394 SDLoc dl(N);
12395 SDNode *Unzip = N0.getNode();
12396 EVT VT = N->getValueType(0);
12397
12399 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
12400 TLI.getPointerTy(DAG.getDataLayout())));
12401 Ops.push_back(Unzip->getOperand(0));
12402 Ops.push_back(Unzip->getOperand(1));
12403
12404 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12405}
12406
12409 const ARMSubtarget *Subtarget) {
12410 // Check for two extended operands.
12411 if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
12412 N1.getOpcode() == ISD::SIGN_EXTEND) &&
12413 !(N0.getOpcode() == ISD::ZERO_EXTEND &&
12414 N1.getOpcode() == ISD::ZERO_EXTEND))
12415 return SDValue();
12416
12417 SDValue N00 = N0.getOperand(0);
12418 SDValue N10 = N1.getOperand(0);
12419
12420 // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
12421 if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
12422 N00 == N10)
12423 return SDValue();
12424
12425 // We only recognize Q register paddl here; this can't be reached until
12426 // after type legalization.
12427 if (!N00.getValueType().is64BitVector() ||
12429 return SDValue();
12430
12431 // Generate vpaddl.
12432 SelectionDAG &DAG = DCI.DAG;
12433 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12434 SDLoc dl(N);
12435 EVT VT = N->getValueType(0);
12436
12438 // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
12439 unsigned Opcode;
12440 if (N0.getOpcode() == ISD::SIGN_EXTEND)
12441 Opcode = Intrinsic::arm_neon_vpaddls;
12442 else
12443 Opcode = Intrinsic::arm_neon_vpaddlu;
12444 Ops.push_back(DAG.getConstant(Opcode, dl,
12445 TLI.getPointerTy(DAG.getDataLayout())));
12446 EVT ElemTy = N00.getValueType().getVectorElementType();
12447 unsigned NumElts = VT.getVectorNumElements();
12448 EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
12449 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
12450 N00.getOperand(0), N00.getOperand(1));
12451 Ops.push_back(Concat);
12452
12453 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12454}
12455
12456// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
12457// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
12458// much easier to match.
12459static SDValue
12462 const ARMSubtarget *Subtarget) {
12463 // Only perform optimization if after legalize, and if NEON is available. We
12464 // also expected both operands to be BUILD_VECTORs.
12465 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
12466 || N0.getOpcode() != ISD::BUILD_VECTOR
12467 || N1.getOpcode() != ISD::BUILD_VECTOR)
12468 return SDValue();
12469
12470 // Check output type since VPADDL operand elements can only be 8, 16, or 32.
12471 EVT VT = N->getValueType(0);
12472 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
12473 return SDValue();
12474
12475 // Check that the vector operands are of the right form.
12476 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
12477 // operands, where N is the size of the formed vector.
12478 // Each EXTRACT_VECTOR should have the same input vector and odd or even
12479 // index such that we have a pair wise add pattern.
12480
12481 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
12483 return SDValue();
12484 SDValue Vec = N0->getOperand(0)->getOperand(0);
12485 SDNode *V = Vec.getNode();
12486 unsigned nextIndex = 0;
12487
12488 // For each operands to the ADD which are BUILD_VECTORs,
12489 // check to see if each of their operands are an EXTRACT_VECTOR with
12490 // the same vector and appropriate index.
12491 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
12494
12495 SDValue ExtVec0 = N0->getOperand(i);
12496 SDValue ExtVec1 = N1->getOperand(i);
12497
12498 // First operand is the vector, verify its the same.
12499 if (V != ExtVec0->getOperand(0).getNode() ||
12500 V != ExtVec1->getOperand(0).getNode())
12501 return SDValue();
12502
12503 // Second is the constant, verify its correct.
12506
12507 // For the constant, we want to see all the even or all the odd.
12508 if (!C0 || !C1 || C0->getZExtValue() != nextIndex
12509 || C1->getZExtValue() != nextIndex+1)
12510 return SDValue();
12511
12512 // Increment index.
12513 nextIndex+=2;
12514 } else
12515 return SDValue();
12516 }
12517
12518 // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
12519 // we're using the entire input vector, otherwise there's a size/legality
12520 // mismatch somewhere.
12521 if (nextIndex != Vec.getValueType().getVectorNumElements() ||
12523 return SDValue();
12524
12525 // Create VPADDL node.
12526 SelectionDAG &DAG = DCI.DAG;
12527 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12528
12529 SDLoc dl(N);
12530
12531 // Build operand list.
12533 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
12534 TLI.getPointerTy(DAG.getDataLayout())));
12535
12536 // Input is the vector.
12537 Ops.push_back(Vec);
12538
12539 // Get widened type and narrowed type.
12540 MVT widenType;
12541 unsigned numElem = VT.getVectorNumElements();
12542
12543 EVT inputLaneType = Vec.getValueType().getVectorElementType();
12544 switch (inputLaneType.getSimpleVT().SimpleTy) {
12545 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
12546 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
12547 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
12548 default:
12549 llvm_unreachable("Invalid vector element type for padd optimization.");
12550 }
12551
12552 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
12553 unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
12554 return DAG.getNode(ExtOp, dl, VT, tmp);
12555}
12556
12558 if (V->getOpcode() == ISD::UMUL_LOHI ||
12559 V->getOpcode() == ISD::SMUL_LOHI)
12560 return V;
12561 return SDValue();
12562}
12563
12564static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
12566 const ARMSubtarget *Subtarget) {
12567 if (!Subtarget->hasBaseDSP())
12568 return SDValue();
12569
12570 // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
12571 // accumulates the product into a 64-bit value. The 16-bit values will
12572 // be sign extended somehow or SRA'd into 32-bit values
12573 // (addc (adde (mul 16bit, 16bit), lo), hi)
12574 SDValue Mul = AddcNode->getOperand(0);
12575 SDValue Lo = AddcNode->getOperand(1);
12576 if (Mul.getOpcode() != ISD::MUL) {
12577 Lo = AddcNode->getOperand(0);
12578 Mul = AddcNode->getOperand(1);
12579 if (Mul.getOpcode() != ISD::MUL)
12580 return SDValue();
12581 }
12582
12583 SDValue SRA = AddeNode->getOperand(0);
12584 SDValue Hi = AddeNode->getOperand(1);
12585 if (SRA.getOpcode() != ISD::SRA) {
12586 SRA = AddeNode->getOperand(1);
12587 Hi = AddeNode->getOperand(0);
12588 if (SRA.getOpcode() != ISD::SRA)
12589 return SDValue();
12590 }
12591 if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
12592 if (Const->getZExtValue() != 31)
12593 return SDValue();
12594 } else
12595 return SDValue();
12596
12597 if (SRA.getOperand(0) != Mul)
12598 return SDValue();
12599
12600 SelectionDAG &DAG = DCI.DAG;
12601 SDLoc dl(AddcNode);
12602 unsigned Opcode = 0;
12603 SDValue Op0;
12604 SDValue Op1;
12605
12606 if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
12607 Opcode = ARMISD::SMLALBB;
12608 Op0 = Mul.getOperand(0);
12609 Op1 = Mul.getOperand(1);
12610 } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
12611 Opcode = ARMISD::SMLALBT;
12612 Op0 = Mul.getOperand(0);
12613 Op1 = Mul.getOperand(1).getOperand(0);
12614 } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
12615 Opcode = ARMISD::SMLALTB;
12616 Op0 = Mul.getOperand(0).getOperand(0);
12617 Op1 = Mul.getOperand(1);
12618 } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
12619 Opcode = ARMISD::SMLALTT;
12620 Op0 = Mul->getOperand(0).getOperand(0);
12621 Op1 = Mul->getOperand(1).getOperand(0);
12622 }
12623
12624 if (!Op0 || !Op1)
12625 return SDValue();
12626
12627 SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
12628 Op0, Op1, Lo, Hi);
12629 // Replace the ADDs' nodes uses by the MLA node's values.
12630 SDValue HiMLALResult(SMLAL.getNode(), 1);
12631 SDValue LoMLALResult(SMLAL.getNode(), 0);
12632
12633 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
12634 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
12635
12636 // Return original node to notify the driver to stop replacing.
12637 SDValue resNode(AddcNode, 0);
12638 return resNode;
12639}
12640
12643 const ARMSubtarget *Subtarget) {
12644 // Look for multiply add opportunities.
12645 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
12646 // each add nodes consumes a value from ISD::UMUL_LOHI and there is
12647 // a glue link from the first add to the second add.
12648 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
12649 // a S/UMLAL instruction.
12650 // UMUL_LOHI
12651 // / :lo \ :hi
12652 // V \ [no multiline comment]
12653 // loAdd -> ADDC |
12654 // \ :carry /
12655 // V V
12656 // ADDE <- hiAdd
12657 //
12658 // In the special case where only the higher part of a signed result is used
12659 // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts
12660 // a constant with the exact value of 0x80000000, we recognize we are dealing
12661 // with a "rounded multiply and add" (or subtract) and transform it into
12662 // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.
12663
12664 assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||
12665 AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
12666 "Expect an ADDE or SUBE");
12667
12668 assert(AddeSubeNode->getNumOperands() == 3 &&
12669 AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&
12670 "ADDE node has the wrong inputs");
12671
12672 // Check that we are chained to the right ADDC or SUBC node.
12673 SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
12674 if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12675 AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
12676 (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
12677 AddcSubcNode->getOpcode() != ARMISD::SUBC))
12678 return SDValue();
12679
12680 SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
12681 SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
12682
12683 // Check if the two operands are from the same mul_lohi node.
12684 if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())
12685 return SDValue();
12686
12687 assert(AddcSubcNode->getNumValues() == 2 &&
12688 AddcSubcNode->getValueType(0) == MVT::i32 &&
12689 "Expect ADDC with two result values. First: i32");
12690
12691 // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
12692 // maybe a SMLAL which multiplies two 16-bit values.
12693 if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12694 AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
12695 AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
12696 AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
12697 AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
12698 return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);
12699
12700 // Check for the triangle shape.
12701 SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
12702 SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
12703
12704 // Make sure that the ADDE/SUBE operands are not coming from the same node.
12705 if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())
12706 return SDValue();
12707
12708 // Find the MUL_LOHI node walking up ADDE/SUBE's operands.
12709 bool IsLeftOperandMUL = false;
12710 SDValue MULOp = findMUL_LOHI(AddeSubeOp0);
12711 if (MULOp == SDValue())
12712 MULOp = findMUL_LOHI(AddeSubeOp1);
12713 else
12714 IsLeftOperandMUL = true;
12715 if (MULOp == SDValue())
12716 return SDValue();
12717
12718 // Figure out the right opcode.
12719 unsigned Opc = MULOp->getOpcode();
12720 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
12721
12722 // Figure out the high and low input values to the MLAL node.
12723 SDValue *HiAddSub = nullptr;
12724 SDValue *LoMul = nullptr;
12725 SDValue *LowAddSub = nullptr;
12726
12727 // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.
12728 if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))
12729 return SDValue();
12730
12731 if (IsLeftOperandMUL)
12732 HiAddSub = &AddeSubeOp1;
12733 else
12734 HiAddSub = &AddeSubeOp0;
12735
12736 // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node
12737 // whose low result is fed to the ADDC/SUBC we are checking.
12738
12739 if (AddcSubcOp0 == MULOp.getValue(0)) {
12740 LoMul = &AddcSubcOp0;
12741 LowAddSub = &AddcSubcOp1;
12742 }
12743 if (AddcSubcOp1 == MULOp.getValue(0)) {
12744 LoMul = &AddcSubcOp1;
12745 LowAddSub = &AddcSubcOp0;
12746 }
12747
12748 if (!LoMul)
12749 return SDValue();
12750
12751 // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC
12752 // the replacement below will create a cycle.
12753 if (AddcSubcNode == HiAddSub->getNode() ||
12754 AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
12755 return SDValue();
12756
12757 // Create the merged node.
12758 SelectionDAG &DAG = DCI.DAG;
12759
12760 // Start building operand list.
12762 Ops.push_back(LoMul->getOperand(0));
12763 Ops.push_back(LoMul->getOperand(1));
12764
12765 // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be
12766 // the case, we must be doing signed multiplication and only use the higher
12767 // part of the result of the MLAL, furthermore the LowAddSub must be a constant
12768 // addition or subtraction with the value of 0x800000.
12769 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
12770 FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
12771 LowAddSub->getNode()->getOpcode() == ISD::Constant &&
12772 static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
12773 0x80000000) {
12774 Ops.push_back(*HiAddSub);
12775 if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
12776 FinalOpc = ARMISD::SMMLSR;
12777 } else {
12778 FinalOpc = ARMISD::SMMLAR;
12779 }
12780 SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);
12781 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);
12782
12783 return SDValue(AddeSubeNode, 0);
12784 } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
12785 // SMMLS is generated during instruction selection and the rest of this
12786 // function can not handle the case where AddcSubcNode is a SUBC.
12787 return SDValue();
12788
12789 // Finish building the operand list for {U/S}MLAL
12790 Ops.push_back(*LowAddSub);
12791 Ops.push_back(*HiAddSub);
12792
12793 SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),
12794 DAG.getVTList(MVT::i32, MVT::i32), Ops);
12795
12796 // Replace the ADDs' nodes uses by the MLA node's values.
12797 SDValue HiMLALResult(MLALNode.getNode(), 1);
12798 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);
12799
12800 SDValue LoMLALResult(MLALNode.getNode(), 0);
12801 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);
12802
12803 // Return original node to notify the driver to stop replacing.
12804 return SDValue(AddeSubeNode, 0);
12805}
12806
12809 const ARMSubtarget *Subtarget) {
12810 // UMAAL is similar to UMLAL except that it adds two unsigned values.
12811 // While trying to combine for the other MLAL nodes, first search for the
12812 // chance to use UMAAL. Check if Addc uses a node which has already
12813 // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
12814 // as the addend, and it's handled in PerformUMLALCombine.
12815
12816 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
12817 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
12818
12819 // Check that we have a glued ADDC node.
12820 SDNode* AddcNode = AddeNode->getOperand(2).getNode();
12821 if (AddcNode->getOpcode() != ARMISD::ADDC)
12822 return SDValue();
12823
12824 // Find the converted UMAAL or quit if it doesn't exist.
12825 SDNode *UmlalNode = nullptr;
12826 SDValue AddHi;
12827 if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
12828 UmlalNode = AddcNode->getOperand(0).getNode();
12829 AddHi = AddcNode->getOperand(1);
12830 } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
12831 UmlalNode = AddcNode->getOperand(1).getNode();
12832 AddHi = AddcNode->getOperand(0);
12833 } else {
12834 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
12835 }
12836
12837 // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
12838 // the ADDC as well as Zero.
12839 if (!isNullConstant(UmlalNode->getOperand(3)))
12840 return SDValue();
12841
12842 if ((isNullConstant(AddeNode->getOperand(0)) &&
12843 AddeNode->getOperand(1).getNode() == UmlalNode) ||
12844 (AddeNode->getOperand(0).getNode() == UmlalNode &&
12845 isNullConstant(AddeNode->getOperand(1)))) {
12846 SelectionDAG &DAG = DCI.DAG;
12847 SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
12848 UmlalNode->getOperand(2), AddHi };
12849 SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
12850 DAG.getVTList(MVT::i32, MVT::i32), Ops);
12851
12852 // Replace the ADDs' nodes uses by the UMAAL node's values.
12853 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
12854 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
12855
12856 // Return original node to notify the driver to stop replacing.
12857 return SDValue(AddeNode, 0);
12858 }
12859 return SDValue();
12860}
12861
12863 const ARMSubtarget *Subtarget) {
12864 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
12865 return SDValue();
12866
12867 // Check that we have a pair of ADDC and ADDE as operands.
12868 // Both addends of the ADDE must be zero.
12869 SDNode* AddcNode = N->getOperand(2).getNode();
12870 SDNode* AddeNode = N->getOperand(3).getNode();
12871 if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
12872 (AddeNode->getOpcode() == ARMISD::ADDE) &&
12873 isNullConstant(AddeNode->getOperand(0)) &&
12874 isNullConstant(AddeNode->getOperand(1)) &&
12875 (AddeNode->getOperand(2).getNode() == AddcNode))
12876 return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
12877 DAG.getVTList(MVT::i32, MVT::i32),
12878 {N->getOperand(0), N->getOperand(1),
12879 AddcNode->getOperand(0), AddcNode->getOperand(1)});
12880 else
12881 return SDValue();
12882}
12883
12886 const ARMSubtarget *Subtarget) {
12887 SelectionDAG &DAG(DCI.DAG);
12888
12889 if (N->getOpcode() == ARMISD::SUBC && N->hasAnyUseOfValue(1)) {
12890 // (SUBC (ADDE 0, 0, C), 1) -> C
12891 SDValue LHS = N->getOperand(0);
12892 SDValue RHS = N->getOperand(1);
12893 if (LHS->getOpcode() == ARMISD::ADDE &&
12894 isNullConstant(LHS->getOperand(0)) &&
12895 isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
12896 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
12897 }
12898 }
12899
12900 if (Subtarget->isThumb1Only()) {
12901 SDValue RHS = N->getOperand(1);
12903 int32_t imm = C->getSExtValue();
12904 if (imm < 0 && imm > std::numeric_limits<int>::min()) {
12905 SDLoc DL(N);
12906 RHS = DAG.getConstant(-imm, DL, MVT::i32);
12907 unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
12908 : ARMISD::ADDC;
12909 return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
12910 }
12911 }
12912 }
12913
12914 return SDValue();
12915}
12916
12919 const ARMSubtarget *Subtarget) {
12920 if (Subtarget->isThumb1Only()) {
12921 SelectionDAG &DAG = DCI.DAG;
12922 SDValue RHS = N->getOperand(1);
12924 int64_t imm = C->getSExtValue();
12925 if (imm < 0) {
12926 SDLoc DL(N);
12927
12928 // The with-carry-in form matches bitwise not instead of the negation.
12929 // Effectively, the inverse interpretation of the carry flag already
12930 // accounts for part of the negation.
12931 RHS = DAG.getConstant(~imm, DL, MVT::i32);
12932
12933 unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
12934 : ARMISD::ADDE;
12935 return DAG.getNode(Opcode, DL, N->getVTList(),
12936 N->getOperand(0), RHS, N->getOperand(2));
12937 }
12938 }
12939 } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
12940 return AddCombineTo64bitMLAL(N, DCI, Subtarget);
12941 }
12942 return SDValue();
12943}
12944
12947 const ARMSubtarget *Subtarget) {
12948 if (!Subtarget->hasMVEIntegerOps())
12949 return SDValue();
12950
12951 SDLoc dl(N);
12952 SDValue SetCC;
12953 SDValue LHS;
12954 SDValue RHS;
12955 ISD::CondCode CC;
12956 SDValue TrueVal;
12957 SDValue FalseVal;
12958
12959 if (N->getOpcode() == ISD::SELECT &&
12960 N->getOperand(0)->getOpcode() == ISD::SETCC) {
12961 SetCC = N->getOperand(0);
12962 LHS = SetCC->getOperand(0);
12963 RHS = SetCC->getOperand(1);
12964 CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
12965 TrueVal = N->getOperand(1);
12966 FalseVal = N->getOperand(2);
12967 } else if (N->getOpcode() == ISD::SELECT_CC) {
12968 LHS = N->getOperand(0);
12969 RHS = N->getOperand(1);
12970 CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
12971 TrueVal = N->getOperand(2);
12972 FalseVal = N->getOperand(3);
12973 } else {
12974 return SDValue();
12975 }
12976
12977 unsigned int Opcode = 0;
12978 if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
12979 FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
12980 (CC == ISD::SETULT || CC == ISD::SETUGT)) {
12981 Opcode = ARMISD::VMINVu;
12982 if (CC == ISD::SETUGT)
12983 std::swap(TrueVal, FalseVal);
12984 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
12985 FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
12986 (CC == ISD::SETLT || CC == ISD::SETGT)) {
12987 Opcode = ARMISD::VMINVs;
12988 if (CC == ISD::SETGT)
12989 std::swap(TrueVal, FalseVal);
12990 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
12991 FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
12992 (CC == ISD::SETUGT || CC == ISD::SETULT)) {
12993 Opcode = ARMISD::VMAXVu;
12994 if (CC == ISD::SETULT)
12995 std::swap(TrueVal, FalseVal);
12996 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
12997 FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
12998 (CC == ISD::SETGT || CC == ISD::SETLT)) {
12999 Opcode = ARMISD::VMAXVs;
13000 if (CC == ISD::SETLT)
13001 std::swap(TrueVal, FalseVal);
13002 } else
13003 return SDValue();
13004
13005 // Normalise to the right hand side being the vector reduction
13006 switch (TrueVal->getOpcode()) {
13007 case ISD::VECREDUCE_UMIN:
13008 case ISD::VECREDUCE_SMIN:
13009 case ISD::VECREDUCE_UMAX:
13010 case ISD::VECREDUCE_SMAX:
13011 std::swap(LHS, RHS);
13012 std::swap(TrueVal, FalseVal);
13013 break;
13014 }
13015
13016 EVT VectorType = FalseVal->getOperand(0).getValueType();
13017
13018 if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&
13019 VectorType != MVT::v4i32)
13020 return SDValue();
13021
13022 EVT VectorScalarType = VectorType.getVectorElementType();
13023
13024 // The values being selected must also be the ones being compared
13025 if (TrueVal != LHS || FalseVal != RHS)
13026 return SDValue();
13027
13028 EVT LeftType = LHS->getValueType(0);
13029 EVT RightType = RHS->getValueType(0);
13030
13031 // The types must match the reduced type too
13032 if (LeftType != VectorScalarType || RightType != VectorScalarType)
13033 return SDValue();
13034
13035 // Legalise the scalar to an i32
13036 if (VectorScalarType != MVT::i32)
13037 LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
13038
13039 // Generate the reduction as an i32 for legalisation purposes
13040 auto Reduction =
13041 DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
13042
13043 // The result isn't actually an i32 so truncate it back to its original type
13044 if (VectorScalarType != MVT::i32)
13045 Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);
13046
13047 return Reduction;
13048}
13049
13050// A special combine for the vqdmulh family of instructions. This is one of the
13051// potential set of patterns that could patch this instruction. The base pattern
13052// you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))).
13053// This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))),
13054// which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as
13055// the max is unnecessary.
13057 EVT VT = N->getValueType(0);
13058 SDValue Shft;
13059 ConstantSDNode *Clamp;
13060
13061 if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
13062 return SDValue();
13063
13064 if (N->getOpcode() == ISD::SMIN) {
13065 Shft = N->getOperand(0);
13066 Clamp = isConstOrConstSplat(N->getOperand(1));
13067 } else if (N->getOpcode() == ISD::VSELECT) {
13068 // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin.
13069 SDValue Cmp = N->getOperand(0);
13070 if (Cmp.getOpcode() != ISD::SETCC ||
13071 cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
13072 Cmp.getOperand(0) != N->getOperand(1) ||
13073 Cmp.getOperand(1) != N->getOperand(2))
13074 return SDValue();
13075 Shft = N->getOperand(1);
13076 Clamp = isConstOrConstSplat(N->getOperand(2));
13077 } else
13078 return SDValue();
13079
13080 if (!Clamp)
13081 return SDValue();
13082
13083 MVT ScalarType;
13084 int ShftAmt = 0;
13085 switch (Clamp->getSExtValue()) {
13086 case (1 << 7) - 1:
13087 ScalarType = MVT::i8;
13088 ShftAmt = 7;
13089 break;
13090 case (1 << 15) - 1:
13091 ScalarType = MVT::i16;
13092 ShftAmt = 15;
13093 break;
13094 case (1ULL << 31) - 1:
13095 ScalarType = MVT::i32;
13096 ShftAmt = 31;
13097 break;
13098 default:
13099 return SDValue();
13100 }
13101
13102 if (Shft.getOpcode() != ISD::SRA)
13103 return SDValue();
13105 if (!N1 || N1->getSExtValue() != ShftAmt)
13106 return SDValue();
13107
13108 SDValue Mul = Shft.getOperand(0);
13109 if (Mul.getOpcode() != ISD::MUL)
13110 return SDValue();
13111
13112 SDValue Ext0 = Mul.getOperand(0);
13113 SDValue Ext1 = Mul.getOperand(1);
13114 if (Ext0.getOpcode() != ISD::SIGN_EXTEND ||
13115 Ext1.getOpcode() != ISD::SIGN_EXTEND)
13116 return SDValue();
13117 EVT VecVT = Ext0.getOperand(0).getValueType();
13118 if (!VecVT.isPow2VectorType() || VecVT.getVectorNumElements() == 1)
13119 return SDValue();
13120 if (Ext1.getOperand(0).getValueType() != VecVT ||
13121 VecVT.getScalarType() != ScalarType ||
13122 VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2)
13123 return SDValue();
13124
13125 SDLoc DL(Mul);
13126 unsigned LegalLanes = 128 / (ShftAmt + 1);
13127 EVT LegalVecVT = MVT::getVectorVT(ScalarType, LegalLanes);
13128 // For types smaller than legal vectors extend to be legal and only use needed
13129 // lanes.
13130 if (VecVT.getSizeInBits() < 128) {
13131 EVT ExtVecVT =
13133 VecVT.getVectorNumElements());
13134 SDValue Inp0 =
13135 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext0.getOperand(0));
13136 SDValue Inp1 =
13137 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext1.getOperand(0));
13138 Inp0 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp0);
13139 Inp1 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp1);
13140 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13141 SDValue Trunc = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, ExtVecVT, VQDMULH);
13142 Trunc = DAG.getNode(ISD::TRUNCATE, DL, VecVT, Trunc);
13143 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Trunc);
13144 }
13145
13146 // For larger types, split into legal sized chunks.
13147 assert(VecVT.getSizeInBits() % 128 == 0 && "Expected a power2 type");
13148 unsigned NumParts = VecVT.getSizeInBits() / 128;
13150 for (unsigned I = 0; I < NumParts; ++I) {
13151 SDValue Inp0 =
13152 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext0.getOperand(0),
13153 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13154 SDValue Inp1 =
13155 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext1.getOperand(0),
13156 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13157 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13158 Parts.push_back(VQDMULH);
13159 }
13160 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT,
13161 DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Parts));
13162}
13163
13166 const ARMSubtarget *Subtarget) {
13167 if (!Subtarget->hasMVEIntegerOps())
13168 return SDValue();
13169
13170 if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG))
13171 return V;
13172
13173 // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
13174 //
13175 // We need to re-implement this optimization here as the implementation in the
13176 // Target-Independent DAGCombiner does not handle the kind of constant we make
13177 // (it calls isConstOrConstSplat with AllowTruncation set to false - and for
13178 // good reason, allowing truncation there would break other targets).
13179 //
13180 // Currently, this is only done for MVE, as it's the only target that benefits
13181 // from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).
13182 if (N->getOperand(0).getOpcode() != ISD::XOR)
13183 return SDValue();
13184 SDValue XOR = N->getOperand(0);
13185
13186 // Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s.
13187 // It is important to check with truncation allowed as the BUILD_VECTORs we
13188 // generate in those situations will truncate their operands.
13189 ConstantSDNode *Const =
13190 isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,
13191 /*AllowTruncation*/ true);
13192 if (!Const || !Const->isOne())
13193 return SDValue();
13194
13195 // Rewrite into vselect(cond, rhs, lhs).
13196 SDValue Cond = XOR->getOperand(0);
13197 SDValue LHS = N->getOperand(1);
13198 SDValue RHS = N->getOperand(2);
13199 EVT Type = N->getValueType(0);
13200 return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);
13201}
13202
13203// Convert vsetcc([0,1,2,..], splat(n), ult) -> vctp n
13206 const ARMSubtarget *Subtarget) {
13207 SDValue Op0 = N->getOperand(0);
13208 SDValue Op1 = N->getOperand(1);
13209 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13210 EVT VT = N->getValueType(0);
13211
13212 if (!Subtarget->hasMVEIntegerOps() ||
13214 return SDValue();
13215
13216 if (CC == ISD::SETUGE) {
13217 std::swap(Op0, Op1);
13218 CC = ISD::SETULT;
13219 }
13220
13221 if (CC != ISD::SETULT || VT.getScalarSizeInBits() != 1 ||
13223 return SDValue();
13224
13225 // Check first operand is BuildVector of 0,1,2,...
13226 for (unsigned I = 0; I < VT.getVectorNumElements(); I++) {
13227 if (!Op0.getOperand(I).isUndef() &&
13229 Op0.getConstantOperandVal(I) == I))
13230 return SDValue();
13231 }
13232
13233 // The second is a Splat of Op1S
13234 SDValue Op1S = DCI.DAG.getSplatValue(Op1);
13235 if (!Op1S)
13236 return SDValue();
13237
13238 unsigned Opc;
13239 switch (VT.getVectorNumElements()) {
13240 case 2:
13241 Opc = Intrinsic::arm_mve_vctp64;
13242 break;
13243 case 4:
13244 Opc = Intrinsic::arm_mve_vctp32;
13245 break;
13246 case 8:
13247 Opc = Intrinsic::arm_mve_vctp16;
13248 break;
13249 case 16:
13250 Opc = Intrinsic::arm_mve_vctp8;
13251 break;
13252 default:
13253 return SDValue();
13254 }
13255
13256 SDLoc DL(N);
13257 return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13258 DCI.DAG.getConstant(Opc, DL, MVT::i32),
13259 DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32));
13260}
13261
13262/// PerformADDECombine - Target-specific dag combine transform from
13263/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
13264/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
13267 const ARMSubtarget *Subtarget) {
13268 // Only ARM and Thumb2 support UMLAL/SMLAL.
13269 if (Subtarget->isThumb1Only())
13270 return PerformAddeSubeCombine(N, DCI, Subtarget);
13271
13272 // Only perform the checks after legalize when the pattern is available.
13273 if (DCI.isBeforeLegalize()) return SDValue();
13274
13275 return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
13276}
13277
13278/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
13279/// operands N0 and N1. This is a helper for PerformADDCombine that is
13280/// called with the default operands, and if that fails, with commuted
13281/// operands.
13284 const ARMSubtarget *Subtarget){
13285 // Attempt to create vpadd for this add.
13286 if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
13287 return Result;
13288
13289 // Attempt to create vpaddl for this add.
13290 if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
13291 return Result;
13292 if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
13293 Subtarget))
13294 return Result;
13295
13296 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
13297 if (N0.getNode()->hasOneUse())
13298 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
13299 return Result;
13300 return SDValue();
13301}
13302
13304 EVT VT = N->getValueType(0);
13305 SDValue N0 = N->getOperand(0);
13306 SDValue N1 = N->getOperand(1);
13307 SDLoc dl(N);
13308
13309 auto IsVecReduce = [](SDValue Op) {
13310 switch (Op.getOpcode()) {
13311 case ISD::VECREDUCE_ADD:
13312 case ARMISD::VADDVs:
13313 case ARMISD::VADDVu:
13314 case ARMISD::VMLAVs:
13315 case ARMISD::VMLAVu:
13316 return true;
13317 }
13318 return false;
13319 };
13320
13321 auto DistrubuteAddAddVecReduce = [&](SDValue N0, SDValue N1) {
13322 // Distribute add(X, add(vecreduce(Y), vecreduce(Z))) ->
13323 // add(add(X, vecreduce(Y)), vecreduce(Z))
13324 // to make better use of vaddva style instructions.
13325 if (VT == MVT::i32 && N1.getOpcode() == ISD::ADD && !IsVecReduce(N0) &&
13326 IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1)) &&
13327 !isa<ConstantSDNode>(N0) && N1->hasOneUse()) {
13328 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0, N1.getOperand(0));
13329 return DAG.getNode(ISD::ADD, dl, VT, Add0, N1.getOperand(1));
13330 }
13331 // And turn add(add(A, reduce(B)), add(C, reduce(D))) ->
13332 // add(add(add(A, C), reduce(B)), reduce(D))
13333 if (VT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
13334 N1.getOpcode() == ISD::ADD && N0->hasOneUse() && N1->hasOneUse()) {
13335 unsigned N0RedOp = 0;
13336 if (!IsVecReduce(N0.getOperand(N0RedOp))) {
13337 N0RedOp = 1;
13338 if (!IsVecReduce(N0.getOperand(N0RedOp)))
13339 return SDValue();
13340 }
13341
13342 unsigned N1RedOp = 0;
13343 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13344 N1RedOp = 1;
13345 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13346 return SDValue();
13347
13348 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0.getOperand(1 - N0RedOp),
13349 N1.getOperand(1 - N1RedOp));
13350 SDValue Add1 =
13351 DAG.getNode(ISD::ADD, dl, VT, Add0, N0.getOperand(N0RedOp));
13352 return DAG.getNode(ISD::ADD, dl, VT, Add1, N1.getOperand(N1RedOp));
13353 }
13354 return SDValue();
13355 };
13356 if (SDValue R = DistrubuteAddAddVecReduce(N0, N1))
13357 return R;
13358 if (SDValue R = DistrubuteAddAddVecReduce(N1, N0))
13359 return R;
13360
13361 // Distribute add(vecreduce(load(Y)), vecreduce(load(Z)))
13362 // Or add(add(X, vecreduce(load(Y))), vecreduce(load(Z)))
13363 // by ascending load offsets. This can help cores prefetch if the order of
13364 // loads is more predictable.
13365 auto DistrubuteVecReduceLoad = [&](SDValue N0, SDValue N1, bool IsForward) {
13366 // Check if two reductions are known to load data where one is before/after
13367 // another. Return negative if N0 loads data before N1, positive if N1 is
13368 // before N0 and 0 otherwise if nothing is known.
13369 auto IsKnownOrderedLoad = [&](SDValue N0, SDValue N1) {
13370 // Look through to the first operand of a MUL, for the VMLA case.
13371 // Currently only looks at the first operand, in the hope they are equal.
13372 if (N0.getOpcode() == ISD::MUL)
13373 N0 = N0.getOperand(0);
13374 if (N1.getOpcode() == ISD::MUL)
13375 N1 = N1.getOperand(0);
13376
13377 // Return true if the two operands are loads to the same object and the
13378 // offset of the first is known to be less than the offset of the second.
13379 LoadSDNode *Load0 = dyn_cast<LoadSDNode>(N0);
13380 LoadSDNode *Load1 = dyn_cast<LoadSDNode>(N1);
13381 if (!Load0 || !Load1 || Load0->getChain() != Load1->getChain() ||
13382 !Load0->isSimple() || !Load1->isSimple() || Load0->isIndexed() ||
13383 Load1->isIndexed())
13384 return 0;
13385
13386 auto BaseLocDecomp0 = BaseIndexOffset::match(Load0, DAG);
13387 auto BaseLocDecomp1 = BaseIndexOffset::match(Load1, DAG);
13388
13389 if (!BaseLocDecomp0.getBase() ||
13390 BaseLocDecomp0.getBase() != BaseLocDecomp1.getBase() ||
13391 !BaseLocDecomp0.hasValidOffset() || !BaseLocDecomp1.hasValidOffset())
13392 return 0;
13393 if (BaseLocDecomp0.getOffset() < BaseLocDecomp1.getOffset())
13394 return -1;
13395 if (BaseLocDecomp0.getOffset() > BaseLocDecomp1.getOffset())
13396 return 1;
13397 return 0;
13398 };
13399
13400 SDValue X;
13401 if (N0.getOpcode() == ISD::ADD && N0->hasOneUse()) {
13402 if (IsVecReduce(N0.getOperand(0)) && IsVecReduce(N0.getOperand(1))) {
13403 int IsBefore = IsKnownOrderedLoad(N0.getOperand(0).getOperand(0),
13404 N0.getOperand(1).getOperand(0));
13405 if (IsBefore < 0) {
13406 X = N0.getOperand(0);
13407 N0 = N0.getOperand(1);
13408 } else if (IsBefore > 0) {
13409 X = N0.getOperand(1);
13410 N0 = N0.getOperand(0);
13411 } else
13412 return SDValue();
13413 } else if (IsVecReduce(N0.getOperand(0))) {
13414 X = N0.getOperand(1);
13415 N0 = N0.getOperand(0);
13416 } else if (IsVecReduce(N0.getOperand(1))) {
13417 X = N0.getOperand(0);
13418 N0 = N0.getOperand(1);
13419 } else
13420 return SDValue();
13421 } else if (IsForward && IsVecReduce(N0) && IsVecReduce(N1) &&
13422 IsKnownOrderedLoad(N0.getOperand(0), N1.getOperand(0)) < 0) {
13423 // Note this is backward to how you would expect. We create
13424 // add(reduce(load + 16), reduce(load + 0)) so that the
13425 // add(reduce(load+16), X) is combined into VADDVA(X, load+16)), leaving
13426 // the X as VADDV(load + 0)
13427 return DAG.getNode(ISD::ADD, dl, VT, N1, N0);
13428 } else
13429 return SDValue();
13430
13431 if (!IsVecReduce(N0) || !IsVecReduce(N1))
13432 return SDValue();
13433
13434 if (IsKnownOrderedLoad(N1.getOperand(0), N0.getOperand(0)) >= 0)
13435 return SDValue();
13436
13437 // Switch from add(add(X, N0), N1) to add(add(X, N1), N0)
13438 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, X, N1);
13439 return DAG.getNode(ISD::ADD, dl, VT, Add0, N0);
13440 };
13441 if (SDValue R = DistrubuteVecReduceLoad(N0, N1, true))
13442 return R;
13443 if (SDValue R = DistrubuteVecReduceLoad(N1, N0, false))
13444 return R;
13445 return SDValue();
13446}
13447
13449 const ARMSubtarget *Subtarget) {
13450 if (!Subtarget->hasMVEIntegerOps())
13451 return SDValue();
13452
13454 return R;
13455
13456 EVT VT = N->getValueType(0);
13457 SDValue N0 = N->getOperand(0);
13458 SDValue N1 = N->getOperand(1);
13459 SDLoc dl(N);
13460
13461 if (VT != MVT::i64)
13462 return SDValue();
13463
13464 // We are looking for a i64 add of a VADDLVx. Due to these being i64's, this
13465 // will look like:
13466 // t1: i32,i32 = ARMISD::VADDLVs x
13467 // t2: i64 = build_pair t1, t1:1
13468 // t3: i64 = add t2, y
13469 // Otherwise we try to push the add up above VADDLVAx, to potentially allow
13470 // the add to be simplified separately.
13471 // We also need to check for sext / zext and commutitive adds.
13472 auto MakeVecReduce = [&](unsigned Opcode, unsigned OpcodeA, SDValue NA,
13473 SDValue NB) {
13474 if (NB->getOpcode() != ISD::BUILD_PAIR)
13475 return SDValue();
13476 SDValue VecRed = NB->getOperand(0);
13477 if ((VecRed->getOpcode() != Opcode && VecRed->getOpcode() != OpcodeA) ||
13478 VecRed.getResNo() != 0 ||
13479 NB->getOperand(1) != SDValue(VecRed.getNode(), 1))
13480 return SDValue();
13481
13482 if (VecRed->getOpcode() == OpcodeA) {
13483 // add(NA, VADDLVA(Inp), Y) -> VADDLVA(add(NA, Inp), Y)
13484 SDValue Inp = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
13485 VecRed.getOperand(0), VecRed.getOperand(1));
13486 NA = DAG.getNode(ISD::ADD, dl, MVT::i64, Inp, NA);
13487 }
13488
13490 std::tie(Ops[0], Ops[1]) = DAG.SplitScalar(NA, dl, MVT::i32, MVT::i32);
13491
13492 unsigned S = VecRed->getOpcode() == OpcodeA ? 2 : 0;
13493 for (unsigned I = S, E = VecRed.getNumOperands(); I < E; I++)
13494 Ops.push_back(VecRed->getOperand(I));
13495 SDValue Red =
13496 DAG.getNode(OpcodeA, dl, DAG.getVTList({MVT::i32, MVT::i32}), Ops);
13497 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Red,
13498 SDValue(Red.getNode(), 1));
13499 };
13500
13501 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N0, N1))
13502 return M;
13503 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N0, N1))
13504 return M;
13505 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N1, N0))
13506 return M;
13507 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N1, N0))
13508 return M;
13509 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N0, N1))
13510 return M;
13511 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N0, N1))
13512 return M;
13513 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N1, N0))
13514 return M;
13515 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N1, N0))
13516 return M;
13517 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N0, N1))
13518 return M;
13519 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N0, N1))
13520 return M;
13521 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N1, N0))
13522 return M;
13523 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
13524 return M;
13525 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
13526 return M;
13527 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
13528 return M;
13529 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
13530 return M;
13531 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
13532 return M;
13533 return SDValue();
13534}
13535
13536bool
13538 CombineLevel Level) const {
13539 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
13540 N->getOpcode() == ISD::SRL) &&
13541 "Expected shift op");
13542
13543 SDValue ShiftLHS = N->getOperand(0);
13544 if (!ShiftLHS->hasOneUse())
13545 return false;
13546
13547 if (ShiftLHS.getOpcode() == ISD::SIGN_EXTEND &&
13548 !ShiftLHS.getOperand(0)->hasOneUse())
13549 return false;
13550
13551 if (Level == BeforeLegalizeTypes)
13552 return true;
13553
13554 if (N->getOpcode() != ISD::SHL)
13555 return true;
13556
13557 if (Subtarget->isThumb1Only()) {
13558 // Avoid making expensive immediates by commuting shifts. (This logic
13559 // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted
13560 // for free.)
13561 if (N->getOpcode() != ISD::SHL)
13562 return true;
13563 SDValue N1 = N->getOperand(0);
13564 if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
13565 N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
13566 return true;
13567 if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
13568 if (Const->getAPIntValue().ult(256))
13569 return false;
13570 if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
13571 Const->getAPIntValue().sgt(-256))
13572 return false;
13573 }
13574 return true;
13575 }
13576
13577 // Turn off commute-with-shift transform after legalization, so it doesn't
13578 // conflict with PerformSHLSimplify. (We could try to detect when
13579 // PerformSHLSimplify would trigger more precisely, but it isn't
13580 // really necessary.)
13581 return false;
13582}
13583
13585 const SDNode *N) const {
13586 assert(N->getOpcode() == ISD::XOR &&
13587 (N->getOperand(0).getOpcode() == ISD::SHL ||
13588 N->getOperand(0).getOpcode() == ISD::SRL) &&
13589 "Expected XOR(SHIFT) pattern");
13590
13591 // Only commute if the entire NOT mask is a hidden shifted mask.
13592 auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
13593 auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
13594 if (XorC && ShiftC) {
13595 unsigned MaskIdx, MaskLen;
13596 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
13597 unsigned ShiftAmt = ShiftC->getZExtValue();
13598 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
13599 if (N->getOperand(0).getOpcode() == ISD::SHL)
13600 return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
13601 return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
13602 }
13603 }
13604
13605 return false;
13606}
13607
13609 const SDNode *N) const {
13610 assert(((N->getOpcode() == ISD::SHL &&
13611 N->getOperand(0).getOpcode() == ISD::SRL) ||
13612 (N->getOpcode() == ISD::SRL &&
13613 N->getOperand(0).getOpcode() == ISD::SHL)) &&
13614 "Expected shift-shift mask");
13615
13616 if (!Subtarget->isThumb1Only())
13617 return true;
13618
13619 EVT VT = N->getValueType(0);
13620 if (VT.getScalarSizeInBits() > 32)
13621 return true;
13622
13623 return false;
13624}
13625
13627 unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X,
13628 SDValue Y) const {
13629 return Subtarget->hasMVEIntegerOps() && isTypeLegal(VT) &&
13630 SelectOpcode == ISD::VSELECT;
13631}
13632
13634 if (!Subtarget->hasNEON()) {
13635 if (Subtarget->isThumb1Only())
13636 return VT.getScalarSizeInBits() <= 32;
13637 return true;
13638 }
13639 return VT.isScalarInteger();
13640}
13641
13643 EVT VT) const {
13644 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
13645 return false;
13646
13647 switch (FPVT.getSimpleVT().SimpleTy) {
13648 case MVT::f16:
13649 return Subtarget->hasVFP2Base();
13650 case MVT::f32:
13651 return Subtarget->hasVFP2Base();
13652 case MVT::f64:
13653 return Subtarget->hasFP64();
13654 case MVT::v4f32:
13655 case MVT::v8f16:
13656 return Subtarget->hasMVEFloatOps();
13657 default:
13658 return false;
13659 }
13660}
13661
13664 const ARMSubtarget *ST) {
13665 // Allow the generic combiner to identify potential bswaps.
13666 if (DCI.isBeforeLegalize())
13667 return SDValue();
13668
13669 // DAG combiner will fold:
13670 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
13671 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
13672 // Other code patterns that can be also be modified have the following form:
13673 // b + ((a << 1) | 510)
13674 // b + ((a << 1) & 510)
13675 // b + ((a << 1) ^ 510)
13676 // b + ((a << 1) + 510)
13677
13678 // Many instructions can perform the shift for free, but it requires both
13679 // the operands to be registers. If c1 << c2 is too large, a mov immediate
13680 // instruction will needed. So, unfold back to the original pattern if:
13681 // - if c1 and c2 are small enough that they don't require mov imms.
13682 // - the user(s) of the node can perform an shl
13683
13684 // No shifted operands for 16-bit instructions.
13685 if (ST->isThumb() && ST->isThumb1Only())
13686 return SDValue();
13687
13688 // Check that all the users could perform the shl themselves.
13689 for (auto *U : N->users()) {
13690 switch(U->getOpcode()) {
13691 default:
13692 return SDValue();
13693 case ISD::SUB:
13694 case ISD::ADD:
13695 case ISD::AND:
13696 case ISD::OR:
13697 case ISD::XOR:
13698 case ISD::SETCC:
13699 case ARMISD::CMP:
13700 // Check that the user isn't already using a constant because there
13701 // aren't any instructions that support an immediate operand and a
13702 // shifted operand.
13703 if (isa<ConstantSDNode>(U->getOperand(0)) ||
13704 isa<ConstantSDNode>(U->getOperand(1)))
13705 return SDValue();
13706
13707 // Check that it's not already using a shift.
13708 if (U->getOperand(0).getOpcode() == ISD::SHL ||
13709 U->getOperand(1).getOpcode() == ISD::SHL)
13710 return SDValue();
13711 break;
13712 }
13713 }
13714
13715 if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
13716 N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
13717 return SDValue();
13718
13719 if (N->getOperand(0).getOpcode() != ISD::SHL)
13720 return SDValue();
13721
13722 SDValue SHL = N->getOperand(0);
13723
13724 auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
13725 auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
13726 if (!C1ShlC2 || !C2)
13727 return SDValue();
13728
13729 APInt C2Int = C2->getAPIntValue();
13730 APInt C1Int = C1ShlC2->getAPIntValue();
13731 unsigned C2Width = C2Int.getBitWidth();
13732 if (C2Int.uge(C2Width))
13733 return SDValue();
13734 uint64_t C2Value = C2Int.getZExtValue();
13735
13736 // Check that performing a lshr will not lose any information.
13737 APInt Mask = APInt::getHighBitsSet(C2Width, C2Width - C2Value);
13738 if ((C1Int & Mask) != C1Int)
13739 return SDValue();
13740
13741 // Shift the first constant.
13742 C1Int.lshrInPlace(C2Int);
13743
13744 // The immediates are encoded as an 8-bit value that can be rotated.
13745 auto LargeImm = [](const APInt &Imm) {
13746 unsigned Zeros = Imm.countl_zero() + Imm.countr_zero();
13747 return Imm.getBitWidth() - Zeros > 8;
13748 };
13749
13750 if (LargeImm(C1Int) || LargeImm(C2Int))
13751 return SDValue();
13752
13753 SelectionDAG &DAG = DCI.DAG;
13754 SDLoc dl(N);
13755 SDValue X = SHL.getOperand(0);
13756 SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
13757 DAG.getConstant(C1Int, dl, MVT::i32));
13758 // Shift left to compensate for the lshr of C1Int.
13759 SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));
13760
13761 LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();
13762 SHL.dump(); N->dump());
13763 LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump());
13764 return Res;
13765}
13766
13767
13768/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
13769///
13772 const ARMSubtarget *Subtarget) {
13773 SDValue N0 = N->getOperand(0);
13774 SDValue N1 = N->getOperand(1);
13775
13776 // Only works one way, because it needs an immediate operand.
13777 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
13778 return Result;
13779
13780 if (SDValue Result = PerformADDVecReduce(N, DCI.DAG, Subtarget))
13781 return Result;
13782
13783 // First try with the default operand order.
13784 if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
13785 return Result;
13786
13787 // If that didn't work, try again with the operands commuted.
13788 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
13789}
13790
13791// Combine (sub 0, (csinc X, Y, CC)) -> (csinv -X, Y, CC)
13792// providing -X is as cheap as X (currently, just a constant).
13794 if (N->getValueType(0) != MVT::i32 || !isNullConstant(N->getOperand(0)))
13795 return SDValue();
13796 SDValue CSINC = N->getOperand(1);
13797 if (CSINC.getOpcode() != ARMISD::CSINC || !CSINC.hasOneUse())
13798 return SDValue();
13799
13801 if (!X)
13802 return SDValue();
13803
13804 return DAG.getNode(ARMISD::CSINV, SDLoc(N), MVT::i32,
13805 DAG.getNode(ISD::SUB, SDLoc(N), MVT::i32, N->getOperand(0),
13806 CSINC.getOperand(0)),
13807 CSINC.getOperand(1), CSINC.getOperand(2),
13808 CSINC.getOperand(3));
13809}
13810
13812 return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0));
13813}
13814
13815// Try to fold
13816//
13817// (neg (cmov X, Y)) -> (cmov (neg X), (neg Y))
13818//
13819// The folding helps cmov to be matched with csneg without generating
13820// redundant neg instruction.
13822 if (!isNegatedInteger(SDValue(N, 0)))
13823 return SDValue();
13824
13825 SDValue CMov = N->getOperand(1);
13826 if (CMov.getOpcode() != ARMISD::CMOV || !CMov->hasOneUse())
13827 return SDValue();
13828
13829 SDValue N0 = CMov.getOperand(0);
13830 SDValue N1 = CMov.getOperand(1);
13831
13832 // If neither of them are negations, it's not worth the folding as it
13833 // introduces two additional negations while reducing one negation.
13834 if (!isNegatedInteger(N0) && !isNegatedInteger(N1))
13835 return SDValue();
13836
13837 SDLoc DL(N);
13838 EVT VT = CMov.getValueType();
13839
13840 SDValue N0N = DAG.getNegative(N0, DL, VT);
13841 SDValue N1N = DAG.getNegative(N1, DL, VT);
13842 return DAG.getNode(ARMISD::CMOV, DL, VT, N0N, N1N, CMov.getOperand(2),
13843 CMov.getOperand(3));
13844}
13845
13846/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
13847///
13850 const ARMSubtarget *Subtarget) {
13851 SDValue N0 = N->getOperand(0);
13852 SDValue N1 = N->getOperand(1);
13853
13854 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
13855 if (N1.getNode()->hasOneUse())
13856 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
13857 return Result;
13858
13859 if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))
13860 return R;
13861
13862 if (SDValue Val = performNegCMovCombine(N, DCI.DAG))
13863 return Val;
13864
13865 if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
13866 return SDValue();
13867
13868 // Fold (sub (ARMvmovImm 0), (ARMvdup x)) -> (ARMvdup (sub 0, x))
13869 // so that we can readily pattern match more mve instructions which can use
13870 // a scalar operand.
13871 SDValue VDup = N->getOperand(1);
13872 if (VDup->getOpcode() != ARMISD::VDUP)
13873 return SDValue();
13874
13875 SDValue VMov = N->getOperand(0);
13876 if (VMov->getOpcode() == ISD::BITCAST)
13877 VMov = VMov->getOperand(0);
13878
13879 if (VMov->getOpcode() != ARMISD::VMOVIMM || !isZeroVector(VMov))
13880 return SDValue();
13881
13882 SDLoc dl(N);
13883 SDValue Negate = DCI.DAG.getNode(ISD::SUB, dl, MVT::i32,
13884 DCI.DAG.getConstant(0, dl, MVT::i32),
13885 VDup->getOperand(0));
13886 return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0), Negate);
13887}
13888
13889/// PerformVMULCombine
13890/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
13891/// special multiplier accumulator forwarding.
13892/// vmul d3, d0, d2
13893/// vmla d3, d1, d2
13894/// is faster than
13895/// vadd d3, d0, d1
13896/// vmul d3, d3, d2
13897// However, for (A + B) * (A + B),
13898// vadd d2, d0, d1
13899// vmul d3, d0, d2
13900// vmla d3, d1, d2
13901// is slower than
13902// vadd d2, d0, d1
13903// vmul d3, d2, d2
13906 const ARMSubtarget *Subtarget) {
13907 if (!Subtarget->hasVMLxForwarding())
13908 return SDValue();
13909
13910 SelectionDAG &DAG = DCI.DAG;
13911 SDValue N0 = N->getOperand(0);
13912 SDValue N1 = N->getOperand(1);
13913 unsigned Opcode = N0.getOpcode();
13914 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
13915 Opcode != ISD::FADD && Opcode != ISD::FSUB) {
13916 Opcode = N1.getOpcode();
13917 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
13918 Opcode != ISD::FADD && Opcode != ISD::FSUB)
13919 return SDValue();
13920 std::swap(N0, N1);
13921 }
13922
13923 if (N0 == N1)
13924 return SDValue();
13925
13926 EVT VT = N->getValueType(0);
13927 SDLoc DL(N);
13928 SDValue N00 = N0->getOperand(0);
13929 SDValue N01 = N0->getOperand(1);
13930 return DAG.getNode(Opcode, DL, VT,
13931 DAG.getNode(ISD::MUL, DL, VT, N00, N1),
13932 DAG.getNode(ISD::MUL, DL, VT, N01, N1));
13933}
13934
13936 const ARMSubtarget *Subtarget) {
13937 EVT VT = N->getValueType(0);
13938 if (VT != MVT::v2i64)
13939 return SDValue();
13940
13941 SDValue N0 = N->getOperand(0);
13942 SDValue N1 = N->getOperand(1);
13943
13944 auto IsSignExt = [&](SDValue Op) {
13945 if (Op->getOpcode() != ISD::SIGN_EXTEND_INREG)
13946 return SDValue();
13947 EVT VT = cast<VTSDNode>(Op->getOperand(1))->getVT();
13948 if (VT.getScalarSizeInBits() == 32)
13949 return Op->getOperand(0);
13950 return SDValue();
13951 };
13952 auto IsZeroExt = [&](SDValue Op) {
13953 // Zero extends are a little more awkward. At the point we are matching
13954 // this, we are looking for an AND with a (-1, 0, -1, 0) buildvector mask.
13955 // That might be before of after a bitcast depending on how the and is
13956 // placed. Because this has to look through bitcasts, it is currently only
13957 // supported on LE.
13958 if (!Subtarget->isLittle())
13959 return SDValue();
13960
13961 SDValue And = Op;
13962 if (And->getOpcode() == ISD::BITCAST)
13963 And = And->getOperand(0);
13964 if (And->getOpcode() != ISD::AND)
13965 return SDValue();
13966 SDValue Mask = And->getOperand(1);
13967 if (Mask->getOpcode() == ISD::BITCAST)
13968 Mask = Mask->getOperand(0);
13969
13970 if (Mask->getOpcode() != ISD::BUILD_VECTOR ||
13971 Mask.getValueType() != MVT::v4i32)
13972 return SDValue();
13973 if (isAllOnesConstant(Mask->getOperand(0)) &&
13974 isNullConstant(Mask->getOperand(1)) &&
13975 isAllOnesConstant(Mask->getOperand(2)) &&
13976 isNullConstant(Mask->getOperand(3)))
13977 return And->getOperand(0);
13978 return SDValue();
13979 };
13980
13981 SDLoc dl(N);
13982 if (SDValue Op0 = IsSignExt(N0)) {
13983 if (SDValue Op1 = IsSignExt(N1)) {
13984 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
13985 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
13986 return DAG.getNode(ARMISD::VMULLs, dl, VT, New0a, New1a);
13987 }
13988 }
13989 if (SDValue Op0 = IsZeroExt(N0)) {
13990 if (SDValue Op1 = IsZeroExt(N1)) {
13991 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
13992 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
13993 return DAG.getNode(ARMISD::VMULLu, dl, VT, New0a, New1a);
13994 }
13995 }
13996
13997 return SDValue();
13998}
13999
14002 const ARMSubtarget *Subtarget) {
14003 SelectionDAG &DAG = DCI.DAG;
14004
14005 EVT VT = N->getValueType(0);
14006 if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)
14007 return PerformMVEVMULLCombine(N, DAG, Subtarget);
14008
14009 if (Subtarget->isThumb1Only())
14010 return SDValue();
14011
14012 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14013 return SDValue();
14014
14015 if (VT.is64BitVector() || VT.is128BitVector())
14016 return PerformVMULCombine(N, DCI, Subtarget);
14017 if (VT != MVT::i32)
14018 return SDValue();
14019
14020 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14021 if (!C)
14022 return SDValue();
14023
14024 int64_t MulAmt = C->getSExtValue();
14025 unsigned ShiftAmt = llvm::countr_zero<uint64_t>(MulAmt);
14026
14027 ShiftAmt = ShiftAmt & (32 - 1);
14028 SDValue V = N->getOperand(0);
14029 SDLoc DL(N);
14030
14031 SDValue Res;
14032 MulAmt >>= ShiftAmt;
14033
14034 if (MulAmt >= 0) {
14035 if (llvm::has_single_bit<uint32_t>(MulAmt - 1)) {
14036 // (mul x, 2^N + 1) => (add (shl x, N), x)
14037 Res = DAG.getNode(ISD::ADD, DL, VT,
14038 V,
14039 DAG.getNode(ISD::SHL, DL, VT,
14040 V,
14041 DAG.getConstant(Log2_32(MulAmt - 1), DL,
14042 MVT::i32)));
14043 } else if (llvm::has_single_bit<uint32_t>(MulAmt + 1)) {
14044 // (mul x, 2^N - 1) => (sub (shl x, N), x)
14045 Res = DAG.getNode(ISD::SUB, DL, VT,
14046 DAG.getNode(ISD::SHL, DL, VT,
14047 V,
14048 DAG.getConstant(Log2_32(MulAmt + 1), DL,
14049 MVT::i32)),
14050 V);
14051 } else
14052 return SDValue();
14053 } else {
14054 uint64_t MulAmtAbs = -MulAmt;
14055 if (llvm::has_single_bit<uint32_t>(MulAmtAbs + 1)) {
14056 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
14057 Res = DAG.getNode(ISD::SUB, DL, VT,
14058 V,
14059 DAG.getNode(ISD::SHL, DL, VT,
14060 V,
14061 DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
14062 MVT::i32)));
14063 } else if (llvm::has_single_bit<uint32_t>(MulAmtAbs - 1)) {
14064 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
14065 Res = DAG.getNode(ISD::ADD, DL, VT,
14066 V,
14067 DAG.getNode(ISD::SHL, DL, VT,
14068 V,
14069 DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
14070 MVT::i32)));
14071 Res = DAG.getNode(ISD::SUB, DL, VT,
14072 DAG.getConstant(0, DL, MVT::i32), Res);
14073 } else
14074 return SDValue();
14075 }
14076
14077 if (ShiftAmt != 0)
14078 Res = DAG.getNode(ISD::SHL, DL, VT,
14079 Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
14080
14081 // Do not add new nodes to DAG combiner worklist.
14082 DCI.CombineTo(N, Res, false);
14083 return SDValue();
14084}
14085
14088 const ARMSubtarget *Subtarget) {
14089 // Allow DAGCombine to pattern-match before we touch the canonical form.
14090 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14091 return SDValue();
14092
14093 if (N->getValueType(0) != MVT::i32)
14094 return SDValue();
14095
14096 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14097 if (!N1C)
14098 return SDValue();
14099
14100 uint32_t C1 = (uint32_t)N1C->getZExtValue();
14101 // Don't transform uxtb/uxth.
14102 if (C1 == 255 || C1 == 65535)
14103 return SDValue();
14104
14105 SDNode *N0 = N->getOperand(0).getNode();
14106 if (!N0->hasOneUse())
14107 return SDValue();
14108
14109 if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
14110 return SDValue();
14111
14112 bool LeftShift = N0->getOpcode() == ISD::SHL;
14113
14115 if (!N01C)
14116 return SDValue();
14117
14118 uint32_t C2 = (uint32_t)N01C->getZExtValue();
14119 if (!C2 || C2 >= 32)
14120 return SDValue();
14121
14122 // Clear irrelevant bits in the mask.
14123 if (LeftShift)
14124 C1 &= (-1U << C2);
14125 else
14126 C1 &= (-1U >> C2);
14127
14128 SelectionDAG &DAG = DCI.DAG;
14129 SDLoc DL(N);
14130
14131 // We have a pattern of the form "(and (shl x, c2) c1)" or
14132 // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
14133 // transform to a pair of shifts, to save materializing c1.
14134
14135 // First pattern: right shift, then mask off leading bits.
14136 // FIXME: Use demanded bits?
14137 if (!LeftShift && isMask_32(C1)) {
14138 uint32_t C3 = llvm::countl_zero(C1);
14139 if (C2 < C3) {
14140 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14141 DAG.getConstant(C3 - C2, DL, MVT::i32));
14142 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14143 DAG.getConstant(C3, DL, MVT::i32));
14144 }
14145 }
14146
14147 // First pattern, reversed: left shift, then mask off trailing bits.
14148 if (LeftShift && isMask_32(~C1)) {
14149 uint32_t C3 = llvm::countr_zero(C1);
14150 if (C2 < C3) {
14151 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14152 DAG.getConstant(C3 - C2, DL, MVT::i32));
14153 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14154 DAG.getConstant(C3, DL, MVT::i32));
14155 }
14156 }
14157
14158 // Second pattern: left shift, then mask off leading bits.
14159 // FIXME: Use demanded bits?
14160 if (LeftShift && isShiftedMask_32(C1)) {
14161 uint32_t Trailing = llvm::countr_zero(C1);
14162 uint32_t C3 = llvm::countl_zero(C1);
14163 if (Trailing == C2 && C2 + C3 < 32) {
14164 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14165 DAG.getConstant(C2 + C3, DL, MVT::i32));
14166 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14167 DAG.getConstant(C3, DL, MVT::i32));
14168 }
14169 }
14170
14171 // Second pattern, reversed: right shift, then mask off trailing bits.
14172 // FIXME: Handle other patterns of known/demanded bits.
14173 if (!LeftShift && isShiftedMask_32(C1)) {
14174 uint32_t Leading = llvm::countl_zero(C1);
14175 uint32_t C3 = llvm::countr_zero(C1);
14176 if (Leading == C2 && C2 + C3 < 32) {
14177 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14178 DAG.getConstant(C2 + C3, DL, MVT::i32));
14179 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14180 DAG.getConstant(C3, DL, MVT::i32));
14181 }
14182 }
14183
14184 // Transform "(and (shl x, c2) c1)" into "(shl (and x, c1>>c2), c2)"
14185 // if "c1 >> c2" is a cheaper immediate than "c1"
14186 if (LeftShift &&
14187 HasLowerConstantMaterializationCost(C1 >> C2, C1, Subtarget)) {
14188
14189 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i32, N0->getOperand(0),
14190 DAG.getConstant(C1 >> C2, DL, MVT::i32));
14191 return DAG.getNode(ISD::SHL, DL, MVT::i32, And,
14192 DAG.getConstant(C2, DL, MVT::i32));
14193 }
14194
14195 return SDValue();
14196}
14197
14200 const ARMSubtarget *Subtarget) {
14201 // Attempt to use immediate-form VBIC
14202 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14203 SDLoc dl(N);
14204 EVT VT = N->getValueType(0);
14205 SelectionDAG &DAG = DCI.DAG;
14206
14207 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v2i1 ||
14208 VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)
14209 return SDValue();
14210
14211 APInt SplatBits, SplatUndef;
14212 unsigned SplatBitSize;
14213 bool HasAnyUndefs;
14214 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14215 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14216 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14217 SplatBitSize == 64) {
14218 EVT VbicVT;
14219 SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
14220 SplatUndef.getZExtValue(), SplatBitSize,
14221 DAG, dl, VbicVT, VT, OtherModImm);
14222 if (Val.getNode()) {
14223 SDValue Input =
14224 DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VbicVT, N->getOperand(0));
14225 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
14226 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vbic);
14227 }
14228 }
14229 }
14230
14231 if (!Subtarget->isThumb1Only()) {
14232 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
14233 if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
14234 return Result;
14235
14236 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14237 return Result;
14238 }
14239
14240 if (Subtarget->isThumb1Only())
14241 if (SDValue Result = CombineANDShift(N, DCI, Subtarget))
14242 return Result;
14243
14244 return SDValue();
14245}
14246
14247// Try combining OR nodes to SMULWB, SMULWT.
14250 const ARMSubtarget *Subtarget) {
14251 if (!Subtarget->hasV6Ops() ||
14252 (Subtarget->isThumb() &&
14253 (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
14254 return SDValue();
14255
14256 SDValue SRL = OR->getOperand(0);
14257 SDValue SHL = OR->getOperand(1);
14258
14259 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
14260 SRL = OR->getOperand(1);
14261 SHL = OR->getOperand(0);
14262 }
14263 if (!isSRL16(SRL) || !isSHL16(SHL))
14264 return SDValue();
14265
14266 // The first operands to the shifts need to be the two results from the
14267 // same smul_lohi node.
14268 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
14269 SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
14270 return SDValue();
14271
14272 SDNode *SMULLOHI = SRL.getOperand(0).getNode();
14273 if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
14274 SHL.getOperand(0) != SDValue(SMULLOHI, 1))
14275 return SDValue();
14276
14277 // Now we have:
14278 // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
14279 // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
14280 // For SMUWB the 16-bit value will signed extended somehow.
14281 // For SMULWT only the SRA is required.
14282 // Check both sides of SMUL_LOHI
14283 SDValue OpS16 = SMULLOHI->getOperand(0);
14284 SDValue OpS32 = SMULLOHI->getOperand(1);
14285
14286 SelectionDAG &DAG = DCI.DAG;
14287 if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
14288 OpS16 = OpS32;
14289 OpS32 = SMULLOHI->getOperand(0);
14290 }
14291
14292 SDLoc dl(OR);
14293 unsigned Opcode = 0;
14294 if (isS16(OpS16, DAG))
14295 Opcode = ARMISD::SMULWB;
14296 else if (isSRA16(OpS16)) {
14297 Opcode = ARMISD::SMULWT;
14298 OpS16 = OpS16->getOperand(0);
14299 }
14300 else
14301 return SDValue();
14302
14303 SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
14304 DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
14305 return SDValue(OR, 0);
14306}
14307
14310 const ARMSubtarget *Subtarget) {
14311 // BFI is only available on V6T2+
14312 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
14313 return SDValue();
14314
14315 EVT VT = N->getValueType(0);
14316 SDValue N0 = N->getOperand(0);
14317 SDValue N1 = N->getOperand(1);
14318 SelectionDAG &DAG = DCI.DAG;
14319 SDLoc DL(N);
14320 // 1) or (and A, mask), val => ARMbfi A, val, mask
14321 // iff (val & mask) == val
14322 //
14323 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14324 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
14325 // && mask == ~mask2
14326 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
14327 // && ~mask == mask2
14328 // (i.e., copy a bitfield value into another bitfield of the same width)
14329
14330 if (VT != MVT::i32)
14331 return SDValue();
14332
14333 SDValue N00 = N0.getOperand(0);
14334
14335 // The value and the mask need to be constants so we can verify this is
14336 // actually a bitfield set. If the mask is 0xffff, we can do better
14337 // via a movt instruction, so don't use BFI in that case.
14338 SDValue MaskOp = N0.getOperand(1);
14340 if (!MaskC)
14341 return SDValue();
14342 unsigned Mask = MaskC->getZExtValue();
14343 if (Mask == 0xffff)
14344 return SDValue();
14345 SDValue Res;
14346 // Case (1): or (and A, mask), val => ARMbfi A, val, mask
14348 if (N1C) {
14349 unsigned Val = N1C->getZExtValue();
14350 if ((Val & ~Mask) != Val)
14351 return SDValue();
14352
14353 if (ARM::isBitFieldInvertedMask(Mask)) {
14354 Val >>= llvm::countr_zero(~Mask);
14355
14356 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
14357 DAG.getConstant(Val, DL, MVT::i32),
14358 DAG.getConstant(Mask, DL, MVT::i32));
14359
14360 DCI.CombineTo(N, Res, false);
14361 // Return value from the original node to inform the combiner than N is
14362 // now dead.
14363 return SDValue(N, 0);
14364 }
14365 } else if (N1.getOpcode() == ISD::AND) {
14366 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14368 if (!N11C)
14369 return SDValue();
14370 unsigned Mask2 = N11C->getZExtValue();
14371
14372 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
14373 // as is to match.
14374 if (ARM::isBitFieldInvertedMask(Mask) &&
14375 (Mask == ~Mask2)) {
14376 // The pack halfword instruction works better for masks that fit it,
14377 // so use that when it's available.
14378 if (Subtarget->hasDSP() &&
14379 (Mask == 0xffff || Mask == 0xffff0000))
14380 return SDValue();
14381 // 2a
14382 unsigned amt = llvm::countr_zero(Mask2);
14383 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
14384 DAG.getConstant(amt, DL, MVT::i32));
14385 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
14386 DAG.getConstant(Mask, DL, MVT::i32));
14387 DCI.CombineTo(N, Res, false);
14388 // Return value from the original node to inform the combiner than N is
14389 // now dead.
14390 return SDValue(N, 0);
14391 } else if (ARM::isBitFieldInvertedMask(~Mask) &&
14392 (~Mask == Mask2)) {
14393 // The pack halfword instruction works better for masks that fit it,
14394 // so use that when it's available.
14395 if (Subtarget->hasDSP() &&
14396 (Mask2 == 0xffff || Mask2 == 0xffff0000))
14397 return SDValue();
14398 // 2b
14399 unsigned lsb = llvm::countr_zero(Mask);
14400 Res = DAG.getNode(ISD::SRL, DL, VT, N00,
14401 DAG.getConstant(lsb, DL, MVT::i32));
14402 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
14403 DAG.getConstant(Mask2, DL, MVT::i32));
14404 DCI.CombineTo(N, Res, false);
14405 // Return value from the original node to inform the combiner than N is
14406 // now dead.
14407 return SDValue(N, 0);
14408 }
14409 }
14410
14411 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
14412 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
14414 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
14415 // where lsb(mask) == #shamt and masked bits of B are known zero.
14416 SDValue ShAmt = N00.getOperand(1);
14417 unsigned ShAmtC = ShAmt->getAsZExtVal();
14418 unsigned LSB = llvm::countr_zero(Mask);
14419 if (ShAmtC != LSB)
14420 return SDValue();
14421
14422 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
14423 DAG.getConstant(~Mask, DL, MVT::i32));
14424
14425 DCI.CombineTo(N, Res, false);
14426 // Return value from the original node to inform the combiner than N is
14427 // now dead.
14428 return SDValue(N, 0);
14429 }
14430
14431 return SDValue();
14432}
14433
14434static bool isValidMVECond(unsigned CC, bool IsFloat) {
14435 switch (CC) {
14436 case ARMCC::EQ:
14437 case ARMCC::NE:
14438 case ARMCC::LE:
14439 case ARMCC::GT:
14440 case ARMCC::GE:
14441 case ARMCC::LT:
14442 return true;
14443 case ARMCC::HS:
14444 case ARMCC::HI:
14445 return !IsFloat;
14446 default:
14447 return false;
14448 };
14449}
14450
14452 if (N->getOpcode() == ARMISD::VCMP)
14453 return (ARMCC::CondCodes)N->getConstantOperandVal(2);
14454 else if (N->getOpcode() == ARMISD::VCMPZ)
14455 return (ARMCC::CondCodes)N->getConstantOperandVal(1);
14456 else
14457 llvm_unreachable("Not a VCMP/VCMPZ!");
14458}
14459
14462 return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint());
14463}
14464
14466 const ARMSubtarget *Subtarget) {
14467 // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
14468 // together with predicates
14469 EVT VT = N->getValueType(0);
14470 SDLoc DL(N);
14471 SDValue N0 = N->getOperand(0);
14472 SDValue N1 = N->getOperand(1);
14473
14474 auto IsFreelyInvertable = [&](SDValue V) {
14475 if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)
14476 return CanInvertMVEVCMP(V);
14477 return false;
14478 };
14479
14480 // At least one operand must be freely invertable.
14481 if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1)))
14482 return SDValue();
14483
14484 SDValue NewN0 = DAG.getLogicalNOT(DL, N0, VT);
14485 SDValue NewN1 = DAG.getLogicalNOT(DL, N1, VT);
14486 SDValue And = DAG.getNode(ISD::AND, DL, VT, NewN0, NewN1);
14487 return DAG.getLogicalNOT(DL, And, VT);
14488}
14489
14490/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
14493 const ARMSubtarget *Subtarget) {
14494 // Attempt to use immediate-form VORR
14495 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14496 SDLoc dl(N);
14497 EVT VT = N->getValueType(0);
14498 SelectionDAG &DAG = DCI.DAG;
14499
14500 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14501 return SDValue();
14502
14503 if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||
14504 VT == MVT::v8i1 || VT == MVT::v16i1))
14505 return PerformORCombine_i1(N, DAG, Subtarget);
14506
14507 APInt SplatBits, SplatUndef;
14508 unsigned SplatBitSize;
14509 bool HasAnyUndefs;
14510 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14511 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14512 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14513 SplatBitSize == 64) {
14514 EVT VorrVT;
14515 SDValue Val =
14516 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
14517 SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
14518 if (Val.getNode()) {
14519 SDValue Input =
14520 DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VorrVT, N->getOperand(0));
14521 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
14522 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vorr);
14523 }
14524 }
14525 }
14526
14527 if (!Subtarget->isThumb1Only()) {
14528 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
14529 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14530 return Result;
14531 if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
14532 return Result;
14533 }
14534
14535 SDValue N0 = N->getOperand(0);
14536 SDValue N1 = N->getOperand(1);
14537
14538 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
14539 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
14541
14542 // The code below optimizes (or (and X, Y), Z).
14543 // The AND operand needs to have a single user to make these optimizations
14544 // profitable.
14545 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
14546 return SDValue();
14547
14548 APInt SplatUndef;
14549 unsigned SplatBitSize;
14550 bool HasAnyUndefs;
14551
14552 APInt SplatBits0, SplatBits1;
14555 // Ensure that the second operand of both ands are constants
14556 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
14557 HasAnyUndefs) && !HasAnyUndefs) {
14558 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
14559 HasAnyUndefs) && !HasAnyUndefs) {
14560 // Ensure that the bit width of the constants are the same and that
14561 // the splat arguments are logical inverses as per the pattern we
14562 // are trying to simplify.
14563 if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
14564 SplatBits0 == ~SplatBits1) {
14565 // Canonicalize the vector type to make instruction selection
14566 // simpler.
14567 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
14568 SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
14569 N0->getOperand(1),
14570 N0->getOperand(0),
14571 N1->getOperand(0));
14572 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Result);
14573 }
14574 }
14575 }
14576 }
14577
14578 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
14579 // reasonable.
14580 if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
14581 if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
14582 return Res;
14583 }
14584
14585 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14586 return Result;
14587
14588 return SDValue();
14589}
14590
14593 const ARMSubtarget *Subtarget) {
14594 EVT VT = N->getValueType(0);
14595 SelectionDAG &DAG = DCI.DAG;
14596
14597 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14598 return SDValue();
14599
14600 if (!Subtarget->isThumb1Only()) {
14601 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
14602 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14603 return Result;
14604
14605 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14606 return Result;
14607 }
14608
14609 if (Subtarget->hasMVEIntegerOps()) {
14610 // fold (xor(vcmp/z, 1)) into a vcmp with the opposite condition.
14611 SDValue N0 = N->getOperand(0);
14612 SDValue N1 = N->getOperand(1);
14613 const TargetLowering *TLI = Subtarget->getTargetLowering();
14614 if (TLI->isConstTrueVal(N1) &&
14615 (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
14616 if (CanInvertMVEVCMP(N0)) {
14617 SDLoc DL(N0);
14619
14621 Ops.push_back(N0->getOperand(0));
14622 if (N0->getOpcode() == ARMISD::VCMP)
14623 Ops.push_back(N0->getOperand(1));
14624 Ops.push_back(DAG.getConstant(CC, DL, MVT::i32));
14625 return DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops);
14626 }
14627 }
14628 }
14629
14630 return SDValue();
14631}
14632
14633// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
14634// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
14635// their position in "to" (Rd).
14636static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
14637 assert(N->getOpcode() == ARMISD::BFI);
14638
14639 SDValue From = N->getOperand(1);
14640 ToMask = ~N->getConstantOperandAPInt(2);
14641 FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.popcount());
14642
14643 // If the Base came from a SHR #C, we can deduce that it is really testing bit
14644 // #C in the base of the SHR.
14645 if (From->getOpcode() == ISD::SRL &&
14646 isa<ConstantSDNode>(From->getOperand(1))) {
14647 APInt Shift = From->getConstantOperandAPInt(1);
14648 assert(Shift.getLimitedValue() < 32 && "Shift too large!");
14649 FromMask <<= Shift.getLimitedValue(31);
14650 From = From->getOperand(0);
14651 }
14652
14653 return From;
14654}
14655
14656// If A and B contain one contiguous set of bits, does A | B == A . B?
14657//
14658// Neither A nor B must be zero.
14659static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
14660 unsigned LastActiveBitInA = A.countr_zero();
14661 unsigned FirstActiveBitInB = B.getBitWidth() - B.countl_zero() - 1;
14662 return LastActiveBitInA - 1 == FirstActiveBitInB;
14663}
14664
14666 // We have a BFI in N. Find a BFI it can combine with, if one exists.
14667 APInt ToMask, FromMask;
14668 SDValue From = ParseBFI(N, ToMask, FromMask);
14669 SDValue To = N->getOperand(0);
14670
14671 SDValue V = To;
14672 if (V.getOpcode() != ARMISD::BFI)
14673 return SDValue();
14674
14675 APInt NewToMask, NewFromMask;
14676 SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
14677 if (NewFrom != From)
14678 return SDValue();
14679
14680 // Do the written bits conflict with any we've seen so far?
14681 if ((NewToMask & ToMask).getBoolValue())
14682 // Conflicting bits.
14683 return SDValue();
14684
14685 // Are the new bits contiguous when combined with the old bits?
14686 if (BitsProperlyConcatenate(ToMask, NewToMask) &&
14687 BitsProperlyConcatenate(FromMask, NewFromMask))
14688 return V;
14689 if (BitsProperlyConcatenate(NewToMask, ToMask) &&
14690 BitsProperlyConcatenate(NewFromMask, FromMask))
14691 return V;
14692
14693 return SDValue();
14694}
14695
14697 SDValue N0 = N->getOperand(0);
14698 SDValue N1 = N->getOperand(1);
14699
14700 if (N1.getOpcode() == ISD::AND) {
14701 // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
14702 // the bits being cleared by the AND are not demanded by the BFI.
14704 if (!N11C)
14705 return SDValue();
14706 unsigned InvMask = N->getConstantOperandVal(2);
14707 unsigned LSB = llvm::countr_zero(~InvMask);
14708 unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB;
14709 assert(Width <
14710 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
14711 "undefined behavior");
14712 unsigned Mask = (1u << Width) - 1;
14713 unsigned Mask2 = N11C->getZExtValue();
14714 if ((Mask & (~Mask2)) == 0)
14715 return DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
14716 N->getOperand(0), N1.getOperand(0), N->getOperand(2));
14717 return SDValue();
14718 }
14719
14720 // Look for another BFI to combine with.
14721 if (SDValue CombineBFI = FindBFIToCombineWith(N)) {
14722 // We've found a BFI.
14723 APInt ToMask1, FromMask1;
14724 SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
14725
14726 APInt ToMask2, FromMask2;
14727 SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
14728 assert(From1 == From2);
14729 (void)From2;
14730
14731 // Create a new BFI, combining the two together.
14732 APInt NewFromMask = FromMask1 | FromMask2;
14733 APInt NewToMask = ToMask1 | ToMask2;
14734
14735 EVT VT = N->getValueType(0);
14736 SDLoc dl(N);
14737
14738 if (NewFromMask[0] == 0)
14739 From1 = DAG.getNode(ISD::SRL, dl, VT, From1,
14740 DAG.getConstant(NewFromMask.countr_zero(), dl, VT));
14741 return DAG.getNode(ARMISD::BFI, dl, VT, CombineBFI.getOperand(0), From1,
14742 DAG.getConstant(~NewToMask, dl, VT));
14743 }
14744
14745 // Reassociate BFI(BFI (A, B, M1), C, M2) to BFI(BFI (A, C, M2), B, M1) so
14746 // that lower bit insertions are performed first, providing that M1 and M2
14747 // do no overlap. This can allow multiple BFI instructions to be combined
14748 // together by the other folds above.
14749 if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
14750 APInt ToMask1 = ~N->getConstantOperandAPInt(2);
14751 APInt ToMask2 = ~N0.getConstantOperandAPInt(2);
14752
14753 if (!N0.hasOneUse() || (ToMask1 & ToMask2) != 0 ||
14754 ToMask1.countl_zero() < ToMask2.countl_zero())
14755 return SDValue();
14756
14757 EVT VT = N->getValueType(0);
14758 SDLoc dl(N);
14759 SDValue BFI1 = DAG.getNode(ARMISD::BFI, dl, VT, N0.getOperand(0),
14760 N->getOperand(1), N->getOperand(2));
14761 return DAG.getNode(ARMISD::BFI, dl, VT, BFI1, N0.getOperand(1),
14762 N0.getOperand(2));
14763 }
14764
14765 return SDValue();
14766}
14767
14768// Check that N is CMPZ(CSINC(0, 0, CC, X)),
14769// or CMPZ(CMOV(1, 0, CC, X))
14770// return X if valid.
14772 if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
14773 return SDValue();
14774 SDValue CSInc = Cmp->getOperand(0);
14775
14776 // Ignore any `And 1` nodes that may not yet have been removed. We are
14777 // looking for a value that produces 1/0, so these have no effect on the
14778 // code.
14779 while (CSInc.getOpcode() == ISD::AND &&
14780 isa<ConstantSDNode>(CSInc.getOperand(1)) &&
14781 CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())
14782 CSInc = CSInc.getOperand(0);
14783
14784 if (CSInc.getOpcode() == ARMISD::CSINC &&
14785 isNullConstant(CSInc.getOperand(0)) &&
14786 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
14788 return CSInc.getOperand(3);
14789 }
14790 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) &&
14791 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
14793 return CSInc.getOperand(3);
14794 }
14795 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) &&
14796 isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
14799 return CSInc.getOperand(3);
14800 }
14801 return SDValue();
14802}
14803
14805 // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in
14806 // t92: flags = ARMISD::CMPZ t74, 0
14807 // t93: i32 = ARMISD::CSINC 0, 0, 1, t92
14808 // t96: flags = ARMISD::CMPZ t93, 0
14809 // t114: i32 = ARMISD::CSINV 0, 0, 0, t96
14811 if (SDValue C = IsCMPZCSINC(N, Cond))
14812 if (Cond == ARMCC::EQ)
14813 return C;
14814 return SDValue();
14815}
14816
14818 // Fold away an unneccessary CMPZ/CSINC
14819 // CSXYZ A, B, C1 (CMPZ (CSINC 0, 0, C2, D), 0) ->
14820 // if C1==EQ -> CSXYZ A, B, C2, D
14821 // if C1==NE -> CSXYZ A, B, NOT(C2), D
14823 if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
14824 if (N->getConstantOperandVal(2) == ARMCC::EQ)
14825 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
14826 N->getOperand(1),
14827 DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
14828 if (N->getConstantOperandVal(2) == ARMCC::NE)
14829 return DAG.getNode(
14830 N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
14831 N->getOperand(1),
14833 }
14834 return SDValue();
14835}
14836
14837/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
14838/// ARMISD::VMOVRRD.
14841 const ARMSubtarget *Subtarget) {
14842 // vmovrrd(vmovdrr x, y) -> x,y
14843 SDValue InDouble = N->getOperand(0);
14844 if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
14845 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
14846
14847 // vmovrrd(load f64) -> (load i32), (load i32)
14848 SDNode *InNode = InDouble.getNode();
14849 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
14850 InNode->getValueType(0) == MVT::f64 &&
14851 InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
14852 !cast<LoadSDNode>(InNode)->isVolatile()) {
14853 // TODO: Should this be done for non-FrameIndex operands?
14854 LoadSDNode *LD = cast<LoadSDNode>(InNode);
14855
14856 SelectionDAG &DAG = DCI.DAG;
14857 SDLoc DL(LD);
14858 SDValue BasePtr = LD->getBasePtr();
14859 SDValue NewLD1 =
14860 DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
14861 LD->getAlign(), LD->getMemOperand()->getFlags());
14862
14863 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
14864 DAG.getConstant(4, DL, MVT::i32));
14865
14866 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
14867 LD->getPointerInfo().getWithOffset(4),
14868 commonAlignment(LD->getAlign(), 4),
14869 LD->getMemOperand()->getFlags());
14870
14871 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
14872 if (DCI.DAG.getDataLayout().isBigEndian())
14873 std::swap (NewLD1, NewLD2);
14874 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
14875 return Result;
14876 }
14877
14878 // VMOVRRD(extract(..(build_vector(a, b, c, d)))) -> a,b or c,d
14879 // VMOVRRD(extract(insert_vector(insert_vector(.., a, l1), b, l2))) -> a,b
14880 if (InDouble.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14881 isa<ConstantSDNode>(InDouble.getOperand(1))) {
14882 SDValue BV = InDouble.getOperand(0);
14883 // Look up through any nop bitcasts and vector_reg_casts. bitcasts may
14884 // change lane order under big endian.
14885 bool BVSwap = BV.getOpcode() == ISD::BITCAST;
14886 while (
14887 (BV.getOpcode() == ISD::BITCAST ||
14888 BV.getOpcode() == ARMISD::VECTOR_REG_CAST) &&
14889 (BV.getValueType() == MVT::v2f64 || BV.getValueType() == MVT::v2i64)) {
14890 BVSwap = BV.getOpcode() == ISD::BITCAST;
14891 BV = BV.getOperand(0);
14892 }
14893 if (BV.getValueType() != MVT::v4i32)
14894 return SDValue();
14895
14896 // Handle buildvectors, pulling out the correct lane depending on
14897 // endianness.
14898 unsigned Offset = InDouble.getConstantOperandVal(1) == 1 ? 2 : 0;
14899 if (BV.getOpcode() == ISD::BUILD_VECTOR) {
14900 SDValue Op0 = BV.getOperand(Offset);
14901 SDValue Op1 = BV.getOperand(Offset + 1);
14902 if (!Subtarget->isLittle() && BVSwap)
14903 std::swap(Op0, Op1);
14904
14905 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
14906 }
14907
14908 // A chain of insert_vectors, grabbing the correct value of the chain of
14909 // inserts.
14910 SDValue Op0, Op1;
14911 while (BV.getOpcode() == ISD::INSERT_VECTOR_ELT) {
14912 if (isa<ConstantSDNode>(BV.getOperand(2))) {
14913 if (BV.getConstantOperandVal(2) == Offset && !Op0)
14914 Op0 = BV.getOperand(1);
14915 if (BV.getConstantOperandVal(2) == Offset + 1 && !Op1)
14916 Op1 = BV.getOperand(1);
14917 }
14918 BV = BV.getOperand(0);
14919 }
14920 if (!Subtarget->isLittle() && BVSwap)
14921 std::swap(Op0, Op1);
14922 if (Op0 && Op1)
14923 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
14924 }
14925
14926 return SDValue();
14927}
14928
14929/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
14930/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
14932 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
14933 SDValue Op0 = N->getOperand(0);
14934 SDValue Op1 = N->getOperand(1);
14935 if (Op0.getOpcode() == ISD::BITCAST)
14936 Op0 = Op0.getOperand(0);
14937 if (Op1.getOpcode() == ISD::BITCAST)
14938 Op1 = Op1.getOperand(0);
14939 if (Op0.getOpcode() == ARMISD::VMOVRRD &&
14940 Op0.getNode() == Op1.getNode() &&
14941 Op0.getResNo() == 0 && Op1.getResNo() == 1)
14942 return DAG.getNode(ISD::BITCAST, SDLoc(N),
14943 N->getValueType(0), Op0.getOperand(0));
14944 return SDValue();
14945}
14946
14949 SDValue Op0 = N->getOperand(0);
14950
14951 // VMOVhr (VMOVrh (X)) -> X
14952 if (Op0->getOpcode() == ARMISD::VMOVrh)
14953 return Op0->getOperand(0);
14954
14955 // FullFP16: half values are passed in S-registers, and we don't
14956 // need any of the bitcast and moves:
14957 //
14958 // t2: f32,ch1,gl1? = CopyFromReg ch, Register:f32 %0, gl?
14959 // t5: i32 = bitcast t2
14960 // t18: f16 = ARMISD::VMOVhr t5
14961 // =>
14962 // tN: f16,ch2,gl2? = CopyFromReg ch, Register::f32 %0, gl?
14963 if (Op0->getOpcode() == ISD::BITCAST) {
14964 SDValue Copy = Op0->getOperand(0);
14965 if (Copy.getValueType() == MVT::f32 &&
14966 Copy->getOpcode() == ISD::CopyFromReg) {
14967 bool HasGlue = Copy->getNumOperands() == 3;
14968 SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1),
14969 HasGlue ? Copy->getOperand(2) : SDValue()};
14970 EVT OutTys[] = {N->getValueType(0), MVT::Other, MVT::Glue};
14971 SDValue NewCopy =
14973 DCI.DAG.getVTList(ArrayRef(OutTys, HasGlue ? 3 : 2)),
14974 ArrayRef(Ops, HasGlue ? 3 : 2));
14975
14976 // Update Users, Chains, and Potential Glue.
14977 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), NewCopy.getValue(0));
14978 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(1), NewCopy.getValue(1));
14979 if (HasGlue)
14980 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(2),
14981 NewCopy.getValue(2));
14982
14983 return NewCopy;
14984 }
14985 }
14986
14987 // fold (VMOVhr (load x)) -> (load (f16*)x)
14988 if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
14989 if (LN0->hasOneUse() && LN0->isUnindexed() &&
14990 LN0->getMemoryVT() == MVT::i16) {
14991 SDValue Load =
14992 DCI.DAG.getLoad(N->getValueType(0), SDLoc(N), LN0->getChain(),
14993 LN0->getBasePtr(), LN0->getMemOperand());
14994 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
14995 DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
14996 return Load;
14997 }
14998 }
14999
15000 // Only the bottom 16 bits of the source register are used.
15001 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15002 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15003 if (TLI.SimplifyDemandedBits(Op0, DemandedMask, DCI))
15004 return SDValue(N, 0);
15005
15006 return SDValue();
15007}
15008
15010 SDValue N0 = N->getOperand(0);
15011 EVT VT = N->getValueType(0);
15012
15013 // fold (VMOVrh (fpconst x)) -> const x
15015 APFloat V = C->getValueAPF();
15016 return DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT);
15017 }
15018
15019 // fold (VMOVrh (load x)) -> (zextload (i16*)x)
15020 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
15021 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15022
15023 SDValue Load =
15024 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(),
15025 LN0->getBasePtr(), MVT::i16, LN0->getMemOperand());
15026 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15027 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15028 return Load;
15029 }
15030
15031 // Fold VMOVrh(extract(x, n)) -> vgetlaneu(x, n)
15032 if (N0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15034 return DAG.getNode(ARMISD::VGETLANEu, SDLoc(N), VT, N0->getOperand(0),
15035 N0->getOperand(1));
15036
15037 return SDValue();
15038}
15039
15040/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
15041/// are normal, non-volatile loads. If so, it is profitable to bitcast an
15042/// i64 vector to have f64 elements, since the value can then be loaded
15043/// directly into a VFP register.
15045 unsigned NumElts = N->getValueType(0).getVectorNumElements();
15046 for (unsigned i = 0; i < NumElts; ++i) {
15047 SDNode *Elt = N->getOperand(i).getNode();
15048 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
15049 return true;
15050 }
15051 return false;
15052}
15053
15054/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
15055/// ISD::BUILD_VECTOR.
15058 const ARMSubtarget *Subtarget) {
15059 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
15060 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
15061 // into a pair of GPRs, which is fine when the value is used as a scalar,
15062 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
15063 SelectionDAG &DAG = DCI.DAG;
15064 if (N->getNumOperands() == 2)
15065 if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
15066 return RV;
15067
15068 // Load i64 elements as f64 values so that type legalization does not split
15069 // them up into i32 values.
15070 EVT VT = N->getValueType(0);
15071 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
15072 return SDValue();
15073 SDLoc dl(N);
15075 unsigned NumElts = VT.getVectorNumElements();
15076 for (unsigned i = 0; i < NumElts; ++i) {
15077 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
15078 Ops.push_back(V);
15079 // Make the DAGCombiner fold the bitcast.
15080 DCI.AddToWorklist(V.getNode());
15081 }
15082 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
15083 SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
15084 return DAG.getNode(ISD::BITCAST, dl, VT, BV);
15085}
15086
15087/// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
15088static SDValue
15090 // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
15091 // At that time, we may have inserted bitcasts from integer to float.
15092 // If these bitcasts have survived DAGCombine, change the lowering of this
15093 // BUILD_VECTOR in something more vector friendly, i.e., that does not
15094 // force to use floating point types.
15095
15096 // Make sure we can change the type of the vector.
15097 // This is possible iff:
15098 // 1. The vector is only used in a bitcast to a integer type. I.e.,
15099 // 1.1. Vector is used only once.
15100 // 1.2. Use is a bit convert to an integer type.
15101 // 2. The size of its operands are 32-bits (64-bits are not legal).
15102 EVT VT = N->getValueType(0);
15103 EVT EltVT = VT.getVectorElementType();
15104
15105 // Check 1.1. and 2.
15106 if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
15107 return SDValue();
15108
15109 // By construction, the input type must be float.
15110 assert(EltVT == MVT::f32 && "Unexpected type!");
15111
15112 // Check 1.2.
15113 SDNode *Use = *N->user_begin();
15114 if (Use->getOpcode() != ISD::BITCAST ||
15115 Use->getValueType(0).isFloatingPoint())
15116 return SDValue();
15117
15118 // Check profitability.
15119 // Model is, if more than half of the relevant operands are bitcast from
15120 // i32, turn the build_vector into a sequence of insert_vector_elt.
15121 // Relevant operands are everything that is not statically
15122 // (i.e., at compile time) bitcasted.
15123 unsigned NumOfBitCastedElts = 0;
15124 unsigned NumElts = VT.getVectorNumElements();
15125 unsigned NumOfRelevantElts = NumElts;
15126 for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
15127 SDValue Elt = N->getOperand(Idx);
15128 if (Elt->getOpcode() == ISD::BITCAST) {
15129 // Assume only bit cast to i32 will go away.
15130 if (Elt->getOperand(0).getValueType() == MVT::i32)
15131 ++NumOfBitCastedElts;
15132 } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
15133 // Constants are statically casted, thus do not count them as
15134 // relevant operands.
15135 --NumOfRelevantElts;
15136 }
15137
15138 // Check if more than half of the elements require a non-free bitcast.
15139 if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
15140 return SDValue();
15141
15142 SelectionDAG &DAG = DCI.DAG;
15143 // Create the new vector type.
15144 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
15145 // Check if the type is legal.
15146 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15147 if (!TLI.isTypeLegal(VecVT))
15148 return SDValue();
15149
15150 // Combine:
15151 // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
15152 // => BITCAST INSERT_VECTOR_ELT
15153 // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
15154 // (BITCAST EN), N.
15155 SDValue Vec = DAG.getUNDEF(VecVT);
15156 SDLoc dl(N);
15157 for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
15158 SDValue V = N->getOperand(Idx);
15159 if (V.isUndef())
15160 continue;
15161 if (V.getOpcode() == ISD::BITCAST &&
15162 V->getOperand(0).getValueType() == MVT::i32)
15163 // Fold obvious case.
15164 V = V.getOperand(0);
15165 else {
15166 V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
15167 // Make the DAGCombiner fold the bitcasts.
15168 DCI.AddToWorklist(V.getNode());
15169 }
15170 SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
15171 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
15172 }
15173 Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
15174 // Make the DAGCombiner fold the bitcasts.
15175 DCI.AddToWorklist(Vec.getNode());
15176 return Vec;
15177}
15178
15179static SDValue
15181 EVT VT = N->getValueType(0);
15182 SDValue Op = N->getOperand(0);
15183 SDLoc dl(N);
15184
15185 // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x)
15186 if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {
15187 // If the valuetypes are the same, we can remove the cast entirely.
15188 if (Op->getOperand(0).getValueType() == VT)
15189 return Op->getOperand(0);
15190 return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15191 }
15192
15193 // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
15194 // more VPNOT which might get folded as else predicates.
15195 if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) {
15196 SDValue X =
15197 DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15198 SDValue C = DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT,
15199 DCI.DAG.getConstant(65535, dl, MVT::i32));
15200 return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C);
15201 }
15202
15203 // Only the bottom 16 bits of the source register are used.
15204 if (Op.getValueType() == MVT::i32) {
15205 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15206 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15207 if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
15208 return SDValue(N, 0);
15209 }
15210 return SDValue();
15211}
15212
15214 const ARMSubtarget *ST) {
15215 EVT VT = N->getValueType(0);
15216 SDValue Op = N->getOperand(0);
15217 SDLoc dl(N);
15218
15219 // Under Little endian, a VECTOR_REG_CAST is equivalent to a BITCAST
15220 if (ST->isLittle())
15221 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
15222
15223 // VT VECTOR_REG_CAST (VT Op) -> Op
15224 if (Op.getValueType() == VT)
15225 return Op;
15226 // VECTOR_REG_CAST undef -> undef
15227 if (Op.isUndef())
15228 return DAG.getUNDEF(VT);
15229
15230 // VECTOR_REG_CAST(VECTOR_REG_CAST(x)) == VECTOR_REG_CAST(x)
15231 if (Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {
15232 // If the valuetypes are the same, we can remove the cast entirely.
15233 if (Op->getOperand(0).getValueType() == VT)
15234 return Op->getOperand(0);
15235 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Op->getOperand(0));
15236 }
15237
15238 return SDValue();
15239}
15240
15242 const ARMSubtarget *Subtarget) {
15243 if (!Subtarget->hasMVEIntegerOps())
15244 return SDValue();
15245
15246 EVT VT = N->getValueType(0);
15247 SDValue Op0 = N->getOperand(0);
15248 SDValue Op1 = N->getOperand(1);
15249 ARMCC::CondCodes Cond = (ARMCC::CondCodes)N->getConstantOperandVal(2);
15250 SDLoc dl(N);
15251
15252 // vcmp X, 0, cc -> vcmpz X, cc
15253 if (isZeroVector(Op1))
15254 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0, N->getOperand(2));
15255
15256 unsigned SwappedCond = getSwappedCondition(Cond);
15257 if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) {
15258 // vcmp 0, X, cc -> vcmpz X, reversed(cc)
15259 if (isZeroVector(Op0))
15260 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1,
15261 DAG.getConstant(SwappedCond, dl, MVT::i32));
15262 // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)
15263 if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)
15264 return DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0,
15265 DAG.getConstant(SwappedCond, dl, MVT::i32));
15266 }
15267
15268 return SDValue();
15269}
15270
15271/// PerformInsertEltCombine - Target-specific dag combine xforms for
15272/// ISD::INSERT_VECTOR_ELT.
15275 // Bitcast an i64 load inserted into a vector to f64.
15276 // Otherwise, the i64 value will be legalized to a pair of i32 values.
15277 EVT VT = N->getValueType(0);
15278 SDNode *Elt = N->getOperand(1).getNode();
15279 if (VT.getVectorElementType() != MVT::i64 ||
15280 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
15281 return SDValue();
15282
15283 SelectionDAG &DAG = DCI.DAG;
15284 SDLoc dl(N);
15285 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
15287 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
15288 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
15289 // Make the DAGCombiner fold the bitcasts.
15290 DCI.AddToWorklist(Vec.getNode());
15291 DCI.AddToWorklist(V.getNode());
15292 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
15293 Vec, V, N->getOperand(2));
15294 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
15295}
15296
15297// Convert a pair of extracts from the same base vector to a VMOVRRD. Either
15298// directly or bitcast to an integer if the original is a float vector.
15299// extract(x, n); extract(x, n+1) -> VMOVRRD(extract v2f64 x, n/2)
15300// bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD(extract x, n/2)
15301static SDValue
15303 EVT VT = N->getValueType(0);
15304 SDLoc dl(N);
15305
15306 if (!DCI.isAfterLegalizeDAG() || VT != MVT::i32 ||
15307 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(MVT::f64))
15308 return SDValue();
15309
15310 SDValue Ext = SDValue(N, 0);
15311 if (Ext.getOpcode() == ISD::BITCAST &&
15312 Ext.getOperand(0).getValueType() == MVT::f32)
15313 Ext = Ext.getOperand(0);
15314 if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15315 !isa<ConstantSDNode>(Ext.getOperand(1)) ||
15316 Ext.getConstantOperandVal(1) % 2 != 0)
15317 return SDValue();
15318 if (Ext->hasOneUse() && (Ext->user_begin()->getOpcode() == ISD::SINT_TO_FP ||
15319 Ext->user_begin()->getOpcode() == ISD::UINT_TO_FP))
15320 return SDValue();
15321
15322 SDValue Op0 = Ext.getOperand(0);
15323 EVT VecVT = Op0.getValueType();
15324 unsigned ResNo = Op0.getResNo();
15325 unsigned Lane = Ext.getConstantOperandVal(1);
15326 if (VecVT.getVectorNumElements() != 4)
15327 return SDValue();
15328
15329 // Find another extract, of Lane + 1
15330 auto OtherIt = find_if(Op0->users(), [&](SDNode *V) {
15331 return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15332 isa<ConstantSDNode>(V->getOperand(1)) &&
15333 V->getConstantOperandVal(1) == Lane + 1 &&
15334 V->getOperand(0).getResNo() == ResNo;
15335 });
15336 if (OtherIt == Op0->users().end())
15337 return SDValue();
15338
15339 // For float extracts, we need to be converting to a i32 for both vector
15340 // lanes.
15341 SDValue OtherExt(*OtherIt, 0);
15342 if (OtherExt.getValueType() != MVT::i32) {
15343 if (!OtherExt->hasOneUse() ||
15344 OtherExt->user_begin()->getOpcode() != ISD::BITCAST ||
15345 OtherExt->user_begin()->getValueType(0) != MVT::i32)
15346 return SDValue();
15347 OtherExt = SDValue(*OtherExt->user_begin(), 0);
15348 }
15349
15350 // Convert the type to a f64 and extract with a VMOVRRD.
15351 SDValue F64 = DCI.DAG.getNode(
15352 ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
15353 DCI.DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v2f64, Op0),
15354 DCI.DAG.getConstant(Ext.getConstantOperandVal(1) / 2, dl, MVT::i32));
15355 SDValue VMOVRRD =
15356 DCI.DAG.getNode(ARMISD::VMOVRRD, dl, {MVT::i32, MVT::i32}, F64);
15357
15358 DCI.CombineTo(OtherExt.getNode(), SDValue(VMOVRRD.getNode(), 1));
15359 return VMOVRRD;
15360}
15361
15364 const ARMSubtarget *ST) {
15365 SDValue Op0 = N->getOperand(0);
15366 EVT VT = N->getValueType(0);
15367 SDLoc dl(N);
15368
15369 // extract (vdup x) -> x
15370 if (Op0->getOpcode() == ARMISD::VDUP) {
15371 SDValue X = Op0->getOperand(0);
15372 if (VT == MVT::f16 && X.getValueType() == MVT::i32)
15373 return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X);
15374 if (VT == MVT::i32 && X.getValueType() == MVT::f16)
15375 return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X);
15376 if (VT == MVT::f32 && X.getValueType() == MVT::i32)
15377 return DCI.DAG.getNode(ISD::BITCAST, dl, VT, X);
15378
15379 while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
15380 X = X->getOperand(0);
15381 if (X.getValueType() == VT)
15382 return X;
15383 }
15384
15385 // extract ARM_BUILD_VECTOR -> x
15386 if (Op0->getOpcode() == ARMISD::BUILD_VECTOR &&
15387 isa<ConstantSDNode>(N->getOperand(1)) &&
15388 N->getConstantOperandVal(1) < Op0.getNumOperands()) {
15389 return Op0.getOperand(N->getConstantOperandVal(1));
15390 }
15391
15392 // extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b
15393 if (Op0.getValueType() == MVT::v4i32 &&
15394 isa<ConstantSDNode>(N->getOperand(1)) &&
15395 Op0.getOpcode() == ISD::BITCAST &&
15397 Op0.getOperand(0).getValueType() == MVT::v2f64) {
15398 SDValue BV = Op0.getOperand(0);
15399 unsigned Offset = N->getConstantOperandVal(1);
15400 SDValue MOV = BV.getOperand(Offset < 2 ? 0 : 1);
15401 if (MOV.getOpcode() == ARMISD::VMOVDRR)
15402 return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);
15403 }
15404
15405 // extract x, n; extract x, n+1 -> VMOVRRD x
15406 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
15407 return R;
15408
15409 // extract (MVETrunc(x)) -> extract x
15410 if (Op0->getOpcode() == ARMISD::MVETRUNC) {
15411 unsigned Idx = N->getConstantOperandVal(1);
15412 unsigned Vec =
15414 unsigned SubIdx =
15416 return DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Op0.getOperand(Vec),
15417 DCI.DAG.getConstant(SubIdx, dl, MVT::i32));
15418 }
15419
15420 return SDValue();
15421}
15422
15424 SDValue Op = N->getOperand(0);
15425 EVT VT = N->getValueType(0);
15426
15427 // sext_inreg(VGETLANEu) -> VGETLANEs
15428 if (Op.getOpcode() == ARMISD::VGETLANEu &&
15429 cast<VTSDNode>(N->getOperand(1))->getVT() ==
15430 Op.getOperand(0).getValueType().getScalarType())
15431 return DAG.getNode(ARMISD::VGETLANEs, SDLoc(N), VT, Op.getOperand(0),
15432 Op.getOperand(1));
15433
15434 return SDValue();
15435}
15436
15437static SDValue
15439 SDValue Vec = N->getOperand(0);
15440 SDValue SubVec = N->getOperand(1);
15441 uint64_t IdxVal = N->getConstantOperandVal(2);
15442 EVT VecVT = Vec.getValueType();
15443 EVT SubVT = SubVec.getValueType();
15444
15445 // Only do this for legal fixed vector types.
15446 if (!VecVT.isFixedLengthVector() ||
15447 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
15449 return SDValue();
15450
15451 // Ignore widening patterns.
15452 if (IdxVal == 0 && Vec.isUndef())
15453 return SDValue();
15454
15455 // Subvector must be half the width and an "aligned" insertion.
15456 unsigned NumSubElts = SubVT.getVectorNumElements();
15457 if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
15458 (IdxVal != 0 && IdxVal != NumSubElts))
15459 return SDValue();
15460
15461 // Fold insert_subvector -> concat_vectors
15462 // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
15463 // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
15464 SDLoc DL(N);
15465 SDValue Lo, Hi;
15466 if (IdxVal == 0) {
15467 Lo = SubVec;
15468 Hi = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15469 DCI.DAG.getVectorIdxConstant(NumSubElts, DL));
15470 } else {
15471 Lo = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15472 DCI.DAG.getVectorIdxConstant(0, DL));
15473 Hi = SubVec;
15474 }
15475 return DCI.DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
15476}
15477
15478// shuffle(MVETrunc(x, y)) -> VMOVN(x, y)
15480 SelectionDAG &DAG) {
15481 SDValue Trunc = N->getOperand(0);
15482 EVT VT = Trunc.getValueType();
15483 if (Trunc.getOpcode() != ARMISD::MVETRUNC || !N->getOperand(1).isUndef())
15484 return SDValue();
15485
15486 SDLoc DL(Trunc);
15487 if (isVMOVNTruncMask(N->getMask(), VT, false))
15488 return DAG.getNode(
15489 ARMISD::VMOVN, DL, VT,
15490 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15491 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15492 DAG.getConstant(1, DL, MVT::i32));
15493 else if (isVMOVNTruncMask(N->getMask(), VT, true))
15494 return DAG.getNode(
15495 ARMISD::VMOVN, DL, VT,
15496 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15497 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15498 DAG.getConstant(1, DL, MVT::i32));
15499 return SDValue();
15500}
15501
15502/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
15503/// ISD::VECTOR_SHUFFLE.
15506 return R;
15507
15508 // The LLVM shufflevector instruction does not require the shuffle mask
15509 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
15510 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
15511 // operands do not match the mask length, they are extended by concatenating
15512 // them with undef vectors. That is probably the right thing for other
15513 // targets, but for NEON it is better to concatenate two double-register
15514 // size vector operands into a single quad-register size vector. Do that
15515 // transformation here:
15516 // shuffle(concat(v1, undef), concat(v2, undef)) ->
15517 // shuffle(concat(v1, v2), undef)
15518 SDValue Op0 = N->getOperand(0);
15519 SDValue Op1 = N->getOperand(1);
15520 if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
15521 Op1.getOpcode() != ISD::CONCAT_VECTORS ||
15522 Op0.getNumOperands() != 2 ||
15523 Op1.getNumOperands() != 2)
15524 return SDValue();
15525 SDValue Concat0Op1 = Op0.getOperand(1);
15526 SDValue Concat1Op1 = Op1.getOperand(1);
15527 if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
15528 return SDValue();
15529 // Skip the transformation if any of the types are illegal.
15530 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15531 EVT VT = N->getValueType(0);
15532 if (!TLI.isTypeLegal(VT) ||
15533 !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
15534 !TLI.isTypeLegal(Concat1Op1.getValueType()))
15535 return SDValue();
15536
15537 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
15538 Op0.getOperand(0), Op1.getOperand(0));
15539 // Translate the shuffle mask.
15540 SmallVector<int, 16> NewMask;
15541 unsigned NumElts = VT.getVectorNumElements();
15542 unsigned HalfElts = NumElts/2;
15544 for (unsigned n = 0; n < NumElts; ++n) {
15545 int MaskElt = SVN->getMaskElt(n);
15546 int NewElt = -1;
15547 if (MaskElt < (int)HalfElts)
15548 NewElt = MaskElt;
15549 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
15550 NewElt = HalfElts + MaskElt - NumElts;
15551 NewMask.push_back(NewElt);
15552 }
15553 return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
15554 DAG.getUNDEF(VT), NewMask);
15555}
15556
15557/// Load/store instruction that can be merged with a base address
15558/// update
15563 unsigned AddrOpIdx;
15564};
15565
15567 /// Instruction that updates a pointer
15569 /// Pointer increment operand
15571 /// Pointer increment value if it is a constant, or 0 otherwise
15572 unsigned ConstInc;
15573};
15574
15576 // Check that the add is independent of the load/store.
15577 // Otherwise, folding it would create a cycle. Search through Addr
15578 // as well, since the User may not be a direct user of Addr and
15579 // only share a base pointer.
15582 Worklist.push_back(N);
15583 Worklist.push_back(User);
15584 const unsigned MaxSteps = 1024;
15585 if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||
15586 SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
15587 return false;
15588 return true;
15589}
15590
15592 struct BaseUpdateUser &User,
15593 bool SimpleConstIncOnly,
15595 SelectionDAG &DAG = DCI.DAG;
15596 SDNode *N = Target.N;
15597 MemSDNode *MemN = cast<MemSDNode>(N);
15598 SDLoc dl(N);
15599
15600 // Find the new opcode for the updating load/store.
15601 bool isLoadOp = true;
15602 bool isLaneOp = false;
15603 // Workaround for vst1x and vld1x intrinsics which do not have alignment
15604 // as an operand.
15605 bool hasAlignment = true;
15606 unsigned NewOpc = 0;
15607 unsigned NumVecs = 0;
15608 if (Target.isIntrinsic) {
15609 unsigned IntNo = N->getConstantOperandVal(1);
15610 switch (IntNo) {
15611 default:
15612 llvm_unreachable("unexpected intrinsic for Neon base update");
15613 case Intrinsic::arm_neon_vld1:
15614 NewOpc = ARMISD::VLD1_UPD;
15615 NumVecs = 1;
15616 break;
15617 case Intrinsic::arm_neon_vld2:
15618 NewOpc = ARMISD::VLD2_UPD;
15619 NumVecs = 2;
15620 break;
15621 case Intrinsic::arm_neon_vld3:
15622 NewOpc = ARMISD::VLD3_UPD;
15623 NumVecs = 3;
15624 break;
15625 case Intrinsic::arm_neon_vld4:
15626 NewOpc = ARMISD::VLD4_UPD;
15627 NumVecs = 4;
15628 break;
15629 case Intrinsic::arm_neon_vld1x2:
15630 NewOpc = ARMISD::VLD1x2_UPD;
15631 NumVecs = 2;
15632 hasAlignment = false;
15633 break;
15634 case Intrinsic::arm_neon_vld1x3:
15635 NewOpc = ARMISD::VLD1x3_UPD;
15636 NumVecs = 3;
15637 hasAlignment = false;
15638 break;
15639 case Intrinsic::arm_neon_vld1x4:
15640 NewOpc = ARMISD::VLD1x4_UPD;
15641 NumVecs = 4;
15642 hasAlignment = false;
15643 break;
15644 case Intrinsic::arm_neon_vld2dup:
15645 NewOpc = ARMISD::VLD2DUP_UPD;
15646 NumVecs = 2;
15647 break;
15648 case Intrinsic::arm_neon_vld3dup:
15649 NewOpc = ARMISD::VLD3DUP_UPD;
15650 NumVecs = 3;
15651 break;
15652 case Intrinsic::arm_neon_vld4dup:
15653 NewOpc = ARMISD::VLD4DUP_UPD;
15654 NumVecs = 4;
15655 break;
15656 case Intrinsic::arm_neon_vld2lane:
15657 NewOpc = ARMISD::VLD2LN_UPD;
15658 NumVecs = 2;
15659 isLaneOp = true;
15660 break;
15661 case Intrinsic::arm_neon_vld3lane:
15662 NewOpc = ARMISD::VLD3LN_UPD;
15663 NumVecs = 3;
15664 isLaneOp = true;
15665 break;
15666 case Intrinsic::arm_neon_vld4lane:
15667 NewOpc = ARMISD::VLD4LN_UPD;
15668 NumVecs = 4;
15669 isLaneOp = true;
15670 break;
15671 case Intrinsic::arm_neon_vst1:
15672 NewOpc = ARMISD::VST1_UPD;
15673 NumVecs = 1;
15674 isLoadOp = false;
15675 break;
15676 case Intrinsic::arm_neon_vst2:
15677 NewOpc = ARMISD::VST2_UPD;
15678 NumVecs = 2;
15679 isLoadOp = false;
15680 break;
15681 case Intrinsic::arm_neon_vst3:
15682 NewOpc = ARMISD::VST3_UPD;
15683 NumVecs = 3;
15684 isLoadOp = false;
15685 break;
15686 case Intrinsic::arm_neon_vst4:
15687 NewOpc = ARMISD::VST4_UPD;
15688 NumVecs = 4;
15689 isLoadOp = false;
15690 break;
15691 case Intrinsic::arm_neon_vst2lane:
15692 NewOpc = ARMISD::VST2LN_UPD;
15693 NumVecs = 2;
15694 isLoadOp = false;
15695 isLaneOp = true;
15696 break;
15697 case Intrinsic::arm_neon_vst3lane:
15698 NewOpc = ARMISD::VST3LN_UPD;
15699 NumVecs = 3;
15700 isLoadOp = false;
15701 isLaneOp = true;
15702 break;
15703 case Intrinsic::arm_neon_vst4lane:
15704 NewOpc = ARMISD::VST4LN_UPD;
15705 NumVecs = 4;
15706 isLoadOp = false;
15707 isLaneOp = true;
15708 break;
15709 case Intrinsic::arm_neon_vst1x2:
15710 NewOpc = ARMISD::VST1x2_UPD;
15711 NumVecs = 2;
15712 isLoadOp = false;
15713 hasAlignment = false;
15714 break;
15715 case Intrinsic::arm_neon_vst1x3:
15716 NewOpc = ARMISD::VST1x3_UPD;
15717 NumVecs = 3;
15718 isLoadOp = false;
15719 hasAlignment = false;
15720 break;
15721 case Intrinsic::arm_neon_vst1x4:
15722 NewOpc = ARMISD::VST1x4_UPD;
15723 NumVecs = 4;
15724 isLoadOp = false;
15725 hasAlignment = false;
15726 break;
15727 }
15728 } else {
15729 isLaneOp = true;
15730 switch (N->getOpcode()) {
15731 default:
15732 llvm_unreachable("unexpected opcode for Neon base update");
15733 case ARMISD::VLD1DUP:
15734 NewOpc = ARMISD::VLD1DUP_UPD;
15735 NumVecs = 1;
15736 break;
15737 case ARMISD::VLD2DUP:
15738 NewOpc = ARMISD::VLD2DUP_UPD;
15739 NumVecs = 2;
15740 break;
15741 case ARMISD::VLD3DUP:
15742 NewOpc = ARMISD::VLD3DUP_UPD;
15743 NumVecs = 3;
15744 break;
15745 case ARMISD::VLD4DUP:
15746 NewOpc = ARMISD::VLD4DUP_UPD;
15747 NumVecs = 4;
15748 break;
15749 case ISD::LOAD:
15750 NewOpc = ARMISD::VLD1_UPD;
15751 NumVecs = 1;
15752 isLaneOp = false;
15753 break;
15754 case ISD::STORE:
15755 NewOpc = ARMISD::VST1_UPD;
15756 NumVecs = 1;
15757 isLaneOp = false;
15758 isLoadOp = false;
15759 break;
15760 }
15761 }
15762
15763 // Find the size of memory referenced by the load/store.
15764 EVT VecTy;
15765 if (isLoadOp) {
15766 VecTy = N->getValueType(0);
15767 } else if (Target.isIntrinsic) {
15768 VecTy = N->getOperand(Target.AddrOpIdx + 1).getValueType();
15769 } else {
15770 assert(Target.isStore &&
15771 "Node has to be a load, a store, or an intrinsic!");
15772 VecTy = N->getOperand(1).getValueType();
15773 }
15774
15775 bool isVLDDUPOp =
15776 NewOpc == ARMISD::VLD1DUP_UPD || NewOpc == ARMISD::VLD2DUP_UPD ||
15777 NewOpc == ARMISD::VLD3DUP_UPD || NewOpc == ARMISD::VLD4DUP_UPD;
15778
15779 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
15780 if (isLaneOp || isVLDDUPOp)
15781 NumBytes /= VecTy.getVectorNumElements();
15782
15783 if (NumBytes >= 3 * 16 && User.ConstInc != NumBytes) {
15784 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
15785 // separate instructions that make it harder to use a non-constant update.
15786 return false;
15787 }
15788
15789 if (SimpleConstIncOnly && User.ConstInc != NumBytes)
15790 return false;
15791
15792 if (!isValidBaseUpdate(N, User.N))
15793 return false;
15794
15795 // OK, we found an ADD we can fold into the base update.
15796 // Now, create a _UPD node, taking care of not breaking alignment.
15797
15798 EVT AlignedVecTy = VecTy;
15799 Align Alignment = MemN->getAlign();
15800
15801 // If this is a less-than-standard-aligned load/store, change the type to
15802 // match the standard alignment.
15803 // The alignment is overlooked when selecting _UPD variants; and it's
15804 // easier to introduce bitcasts here than fix that.
15805 // There are 3 ways to get to this base-update combine:
15806 // - intrinsics: they are assumed to be properly aligned (to the standard
15807 // alignment of the memory type), so we don't need to do anything.
15808 // - ARMISD::VLDx nodes: they are only generated from the aforementioned
15809 // intrinsics, so, likewise, there's nothing to do.
15810 // - generic load/store instructions: the alignment is specified as an
15811 // explicit operand, rather than implicitly as the standard alignment
15812 // of the memory type (like the intrisics). We need to change the
15813 // memory type to match the explicit alignment. That way, we don't
15814 // generate non-standard-aligned ARMISD::VLDx nodes.
15815 if (isa<LSBaseSDNode>(N)) {
15816 if (Alignment.value() < VecTy.getScalarSizeInBits() / 8) {
15817 MVT EltTy = MVT::getIntegerVT(Alignment.value() * 8);
15818 assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
15819 assert(!isLaneOp && "Unexpected generic load/store lane.");
15820 unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
15821 AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
15822 }
15823 // Don't set an explicit alignment on regular load/stores that we want
15824 // to transform to VLD/VST 1_UPD nodes.
15825 // This matches the behavior of regular load/stores, which only get an
15826 // explicit alignment if the MMO alignment is larger than the standard
15827 // alignment of the memory type.
15828 // Intrinsics, however, always get an explicit alignment, set to the
15829 // alignment of the MMO.
15830 Alignment = Align(1);
15831 }
15832
15833 // Create the new updating load/store node.
15834 // First, create an SDVTList for the new updating node's results.
15835 EVT Tys[6];
15836 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
15837 unsigned n;
15838 for (n = 0; n < NumResultVecs; ++n)
15839 Tys[n] = AlignedVecTy;
15840 Tys[n++] = MVT::i32;
15841 Tys[n] = MVT::Other;
15842 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
15843
15844 // Then, gather the new node's operands.
15846 Ops.push_back(N->getOperand(0)); // incoming chain
15847 Ops.push_back(N->getOperand(Target.AddrOpIdx));
15848 Ops.push_back(User.Inc);
15849
15850 if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
15851 // Try to match the intrinsic's signature
15852 Ops.push_back(StN->getValue());
15853 } else {
15854 // Loads (and of course intrinsics) match the intrinsics' signature,
15855 // so just add all but the alignment operand.
15856 unsigned LastOperand =
15857 hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();
15858 for (unsigned i = Target.AddrOpIdx + 1; i < LastOperand; ++i)
15859 Ops.push_back(N->getOperand(i));
15860 }
15861
15862 // For all node types, the alignment operand is always the last one.
15863 Ops.push_back(DAG.getConstant(Alignment.value(), dl, MVT::i32));
15864
15865 // If this is a non-standard-aligned STORE, the penultimate operand is the
15866 // stored value. Bitcast it to the aligned type.
15867 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
15868 SDValue &StVal = Ops[Ops.size() - 2];
15869 StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
15870 }
15871
15872 EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
15873 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
15874 MemN->getMemOperand());
15875
15876 // Update the uses.
15877 SmallVector<SDValue, 5> NewResults;
15878 for (unsigned i = 0; i < NumResultVecs; ++i)
15879 NewResults.push_back(SDValue(UpdN.getNode(), i));
15880
15881 // If this is an non-standard-aligned LOAD, the first result is the loaded
15882 // value. Bitcast it to the expected result type.
15883 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
15884 SDValue &LdVal = NewResults[0];
15885 LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
15886 }
15887
15888 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
15889 DCI.CombineTo(N, NewResults);
15890 DCI.CombineTo(User.N, SDValue(UpdN.getNode(), NumResultVecs));
15891
15892 return true;
15893}
15894
15895// If (opcode ptr inc) is and ADD-like instruction, return the
15896// increment value. Otherwise return 0.
15897static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr,
15898 SDValue Inc, const SelectionDAG &DAG) {
15900 if (!CInc)
15901 return 0;
15902
15903 switch (Opcode) {
15904 case ARMISD::VLD1_UPD:
15905 case ISD::ADD:
15906 return CInc->getZExtValue();
15907 case ISD::OR: {
15908 if (DAG.haveNoCommonBitsSet(Ptr, Inc)) {
15909 // (OR ptr inc) is the same as (ADD ptr inc)
15910 return CInc->getZExtValue();
15911 }
15912 return 0;
15913 }
15914 default:
15915 return 0;
15916 }
15917}
15918
15920 switch (N->getOpcode()) {
15921 case ISD::ADD:
15922 case ISD::OR: {
15923 if (isa<ConstantSDNode>(N->getOperand(1))) {
15924 *Ptr = N->getOperand(0);
15925 *CInc = N->getOperand(1);
15926 return true;
15927 }
15928 return false;
15929 }
15930 case ARMISD::VLD1_UPD: {
15931 if (isa<ConstantSDNode>(N->getOperand(2))) {
15932 *Ptr = N->getOperand(1);
15933 *CInc = N->getOperand(2);
15934 return true;
15935 }
15936 return false;
15937 }
15938 default:
15939 return false;
15940 }
15941}
15942
15943/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
15944/// NEON load/store intrinsics, and generic vector load/stores, to merge
15945/// base address updates.
15946/// For generic load/stores, the memory type is assumed to be a vector.
15947/// The caller is assumed to have checked legality.
15950 const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
15951 N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
15952 const bool isStore = N->getOpcode() == ISD::STORE;
15953 const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
15954 BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
15955
15956 // Limit the number of possible base-updates we look at to prevent degenerate
15957 // cases.
15958 unsigned MaxBaseUpdates = ArmMaxBaseUpdatesToCheck;
15959
15960 SDValue Addr = N->getOperand(AddrOpIdx);
15961
15963
15964 // Search for a use of the address operand that is an increment.
15965 for (SDUse &Use : Addr->uses()) {
15966 SDNode *User = Use.getUser();
15967 if (Use.getResNo() != Addr.getResNo() || User->getNumOperands() != 2)
15968 continue;
15969
15970 SDValue Inc = User->getOperand(Use.getOperandNo() == 1 ? 0 : 1);
15971 unsigned ConstInc =
15972 getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
15973
15974 if (ConstInc || User->getOpcode() == ISD::ADD) {
15975 BaseUpdates.push_back({User, Inc, ConstInc});
15976 if (BaseUpdates.size() >= MaxBaseUpdates)
15977 break;
15978 }
15979 }
15980
15981 // If the address is a constant pointer increment itself, find
15982 // another constant increment that has the same base operand
15983 SDValue Base;
15984 SDValue CInc;
15985 if (findPointerConstIncrement(Addr.getNode(), &Base, &CInc)) {
15986 unsigned Offset =
15987 getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG);
15988 for (SDUse &Use : Base->uses()) {
15989
15990 SDNode *User = Use.getUser();
15991 if (Use.getResNo() != Base.getResNo() || User == Addr.getNode() ||
15992 User->getNumOperands() != 2)
15993 continue;
15994
15995 SDValue UserInc = User->getOperand(Use.getOperandNo() == 0 ? 1 : 0);
15996 unsigned UserOffset =
15997 getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG);
15998
15999 if (!UserOffset || UserOffset <= Offset)
16000 continue;
16001
16002 unsigned NewConstInc = UserOffset - Offset;
16003 SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
16004 BaseUpdates.push_back({User, NewInc, NewConstInc});
16005 if (BaseUpdates.size() >= MaxBaseUpdates)
16006 break;
16007 }
16008 }
16009
16010 // Try to fold the load/store with an update that matches memory
16011 // access size. This should work well for sequential loads.
16012 unsigned NumValidUpd = BaseUpdates.size();
16013 for (unsigned I = 0; I < NumValidUpd; I++) {
16014 BaseUpdateUser &User = BaseUpdates[I];
16015 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
16016 return SDValue();
16017 }
16018
16019 // Try to fold with other users. Non-constant updates are considered
16020 // first, and constant updates are sorted to not break a sequence of
16021 // strided accesses (if there is any).
16022 llvm::stable_sort(BaseUpdates,
16023 [](const BaseUpdateUser &LHS, const BaseUpdateUser &RHS) {
16024 return LHS.ConstInc < RHS.ConstInc;
16025 });
16026 for (BaseUpdateUser &User : BaseUpdates) {
16027 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI))
16028 return SDValue();
16029 }
16030 return SDValue();
16031}
16032
16035 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16036 return SDValue();
16037
16038 return CombineBaseUpdate(N, DCI);
16039}
16040
16043 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16044 return SDValue();
16045
16046 SelectionDAG &DAG = DCI.DAG;
16047 SDValue Addr = N->getOperand(2);
16048 MemSDNode *MemN = cast<MemSDNode>(N);
16049 SDLoc dl(N);
16050
16051 // For the stores, where there are multiple intrinsics we only actually want
16052 // to post-inc the last of the them.
16053 unsigned IntNo = N->getConstantOperandVal(1);
16054 if (IntNo == Intrinsic::arm_mve_vst2q && N->getConstantOperandVal(5) != 1)
16055 return SDValue();
16056 if (IntNo == Intrinsic::arm_mve_vst4q && N->getConstantOperandVal(7) != 3)
16057 return SDValue();
16058
16059 // Search for a use of the address operand that is an increment.
16060 for (SDUse &Use : Addr->uses()) {
16061 SDNode *User = Use.getUser();
16062 if (User->getOpcode() != ISD::ADD || Use.getResNo() != Addr.getResNo())
16063 continue;
16064
16065 // Check that the add is independent of the load/store. Otherwise, folding
16066 // it would create a cycle. We can avoid searching through Addr as it's a
16067 // predecessor to both.
16070 Visited.insert(Addr.getNode());
16071 Worklist.push_back(N);
16072 Worklist.push_back(User);
16073 const unsigned MaxSteps = 1024;
16074 if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||
16075 SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
16076 continue;
16077
16078 // Find the new opcode for the updating load/store.
16079 bool isLoadOp = true;
16080 unsigned NewOpc = 0;
16081 unsigned NumVecs = 0;
16082 switch (IntNo) {
16083 default:
16084 llvm_unreachable("unexpected intrinsic for MVE VLDn combine");
16085 case Intrinsic::arm_mve_vld2q:
16086 NewOpc = ARMISD::VLD2_UPD;
16087 NumVecs = 2;
16088 break;
16089 case Intrinsic::arm_mve_vld4q:
16090 NewOpc = ARMISD::VLD4_UPD;
16091 NumVecs = 4;
16092 break;
16093 case Intrinsic::arm_mve_vst2q:
16094 NewOpc = ARMISD::VST2_UPD;
16095 NumVecs = 2;
16096 isLoadOp = false;
16097 break;
16098 case Intrinsic::arm_mve_vst4q:
16099 NewOpc = ARMISD::VST4_UPD;
16100 NumVecs = 4;
16101 isLoadOp = false;
16102 break;
16103 }
16104
16105 // Find the size of memory referenced by the load/store.
16106 EVT VecTy;
16107 if (isLoadOp) {
16108 VecTy = N->getValueType(0);
16109 } else {
16110 VecTy = N->getOperand(3).getValueType();
16111 }
16112
16113 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
16114
16115 // If the increment is a constant, it must match the memory ref size.
16116 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
16118 if (!CInc || CInc->getZExtValue() != NumBytes)
16119 continue;
16120
16121 // Create the new updating load/store node.
16122 // First, create an SDVTList for the new updating node's results.
16123 EVT Tys[6];
16124 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16125 unsigned n;
16126 for (n = 0; n < NumResultVecs; ++n)
16127 Tys[n] = VecTy;
16128 Tys[n++] = MVT::i32;
16129 Tys[n] = MVT::Other;
16130 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16131
16132 // Then, gather the new node's operands.
16134 Ops.push_back(N->getOperand(0)); // incoming chain
16135 Ops.push_back(N->getOperand(2)); // ptr
16136 Ops.push_back(Inc);
16137
16138 for (unsigned i = 3; i < N->getNumOperands(); ++i)
16139 Ops.push_back(N->getOperand(i));
16140
16141 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, VecTy,
16142 MemN->getMemOperand());
16143
16144 // Update the uses.
16145 SmallVector<SDValue, 5> NewResults;
16146 for (unsigned i = 0; i < NumResultVecs; ++i)
16147 NewResults.push_back(SDValue(UpdN.getNode(), i));
16148
16149 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16150 DCI.CombineTo(N, NewResults);
16151 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
16152
16153 break;
16154 }
16155
16156 return SDValue();
16157}
16158
16159/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
16160/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
16161/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
16162/// return true.
16164 SelectionDAG &DAG = DCI.DAG;
16165 EVT VT = N->getValueType(0);
16166 // vldN-dup instructions only support 64-bit vectors for N > 1.
16167 if (!VT.is64BitVector())
16168 return false;
16169
16170 // Check if the VDUPLANE operand is a vldN-dup intrinsic.
16171 SDNode *VLD = N->getOperand(0).getNode();
16172 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
16173 return false;
16174 unsigned NumVecs = 0;
16175 unsigned NewOpc = 0;
16176 unsigned IntNo = VLD->getConstantOperandVal(1);
16177 if (IntNo == Intrinsic::arm_neon_vld2lane) {
16178 NumVecs = 2;
16179 NewOpc = ARMISD::VLD2DUP;
16180 } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
16181 NumVecs = 3;
16182 NewOpc = ARMISD::VLD3DUP;
16183 } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
16184 NumVecs = 4;
16185 NewOpc = ARMISD::VLD4DUP;
16186 } else {
16187 return false;
16188 }
16189
16190 // First check that all the vldN-lane uses are VDUPLANEs and that the lane
16191 // numbers match the load.
16192 unsigned VLDLaneNo = VLD->getConstantOperandVal(NumVecs + 3);
16193 for (SDUse &Use : VLD->uses()) {
16194 // Ignore uses of the chain result.
16195 if (Use.getResNo() == NumVecs)
16196 continue;
16197 SDNode *User = Use.getUser();
16198 if (User->getOpcode() != ARMISD::VDUPLANE ||
16199 VLDLaneNo != User->getConstantOperandVal(1))
16200 return false;
16201 }
16202
16203 // Create the vldN-dup node.
16204 EVT Tys[5];
16205 unsigned n;
16206 for (n = 0; n < NumVecs; ++n)
16207 Tys[n] = VT;
16208 Tys[n] = MVT::Other;
16209 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumVecs + 1));
16210 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
16212 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
16213 Ops, VLDMemInt->getMemoryVT(),
16214 VLDMemInt->getMemOperand());
16215
16216 // Update the uses.
16217 for (SDUse &Use : VLD->uses()) {
16218 unsigned ResNo = Use.getResNo();
16219 // Ignore uses of the chain result.
16220 if (ResNo == NumVecs)
16221 continue;
16222 DCI.CombineTo(Use.getUser(), SDValue(VLDDup.getNode(), ResNo));
16223 }
16224
16225 // Now the vldN-lane intrinsic is dead except for its chain result.
16226 // Update uses of the chain.
16227 std::vector<SDValue> VLDDupResults;
16228 for (unsigned n = 0; n < NumVecs; ++n)
16229 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
16230 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
16231 DCI.CombineTo(VLD, VLDDupResults);
16232
16233 return true;
16234}
16235
16236/// PerformVDUPLANECombine - Target-specific dag combine xforms for
16237/// ARMISD::VDUPLANE.
16240 const ARMSubtarget *Subtarget) {
16241 SDValue Op = N->getOperand(0);
16242 EVT VT = N->getValueType(0);
16243
16244 // On MVE, we just convert the VDUPLANE to a VDUP with an extract.
16245 if (Subtarget->hasMVEIntegerOps()) {
16246 EVT ExtractVT = VT.getVectorElementType();
16247 // We need to ensure we are creating a legal type.
16248 if (!DCI.DAG.getTargetLoweringInfo().isTypeLegal(ExtractVT))
16249 ExtractVT = MVT::i32;
16250 SDValue Extract = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ExtractVT,
16251 N->getOperand(0), N->getOperand(1));
16252 return DCI.DAG.getNode(ARMISD::VDUP, SDLoc(N), VT, Extract);
16253 }
16254
16255 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
16256 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
16257 if (CombineVLDDUP(N, DCI))
16258 return SDValue(N, 0);
16259
16260 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
16261 // redundant. Ignore bit_converts for now; element sizes are checked below.
16262 while (Op.getOpcode() == ISD::BITCAST)
16263 Op = Op.getOperand(0);
16264 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
16265 return SDValue();
16266
16267 // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
16268 unsigned EltSize = Op.getScalarValueSizeInBits();
16269 // The canonical VMOV for a zero vector uses a 32-bit element size.
16270 unsigned Imm = Op.getConstantOperandVal(0);
16271 unsigned EltBits;
16272 if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0)
16273 EltSize = 8;
16274 if (EltSize > VT.getScalarSizeInBits())
16275 return SDValue();
16276
16277 return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
16278}
16279
16280/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
16282 const ARMSubtarget *Subtarget) {
16283 SDValue Op = N->getOperand(0);
16284 SDLoc dl(N);
16285
16286 if (Subtarget->hasMVEIntegerOps()) {
16287 // Convert VDUP f32 -> VDUP BITCAST i32 under MVE, as we know the value will
16288 // need to come from a GPR.
16289 if (Op.getValueType() == MVT::f32)
16290 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16291 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op));
16292 else if (Op.getValueType() == MVT::f16)
16293 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16294 DAG.getNode(ARMISD::VMOVrh, dl, MVT::i32, Op));
16295 }
16296
16297 if (!Subtarget->hasNEON())
16298 return SDValue();
16299
16300 // Match VDUP(LOAD) -> VLD1DUP.
16301 // We match this pattern here rather than waiting for isel because the
16302 // transform is only legal for unindexed loads.
16303 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
16304 if (LD && Op.hasOneUse() && LD->isUnindexed() &&
16305 LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
16306 SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),
16307 DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)};
16308 SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
16309 SDValue VLDDup =
16311 LD->getMemoryVT(), LD->getMemOperand());
16312 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
16313 return VLDDup;
16314 }
16315
16316 return SDValue();
16317}
16318
16321 const ARMSubtarget *Subtarget) {
16322 EVT VT = N->getValueType(0);
16323
16324 // If this is a legal vector load, try to combine it into a VLD1_UPD.
16325 if (Subtarget->hasNEON() && ISD::isNormalLoad(N) && VT.isVector() &&
16327 return CombineBaseUpdate(N, DCI);
16328
16329 return SDValue();
16330}
16331
16332// Optimize trunc store (of multiple scalars) to shuffle and store. First,
16333// pack all of the elements in one place. Next, store to memory in fewer
16334// chunks.
16336 SelectionDAG &DAG) {
16337 SDValue StVal = St->getValue();
16338 EVT VT = StVal.getValueType();
16339 if (!St->isTruncatingStore() || !VT.isVector())
16340 return SDValue();
16341 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16342 EVT StVT = St->getMemoryVT();
16343 unsigned NumElems = VT.getVectorNumElements();
16344 assert(StVT != VT && "Cannot truncate to the same type");
16345 unsigned FromEltSz = VT.getScalarSizeInBits();
16346 unsigned ToEltSz = StVT.getScalarSizeInBits();
16347
16348 // From, To sizes and ElemCount must be pow of two
16349 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz))
16350 return SDValue();
16351
16352 // We are going to use the original vector elt for storing.
16353 // Accumulated smaller vector elements must be a multiple of the store size.
16354 if (0 != (NumElems * FromEltSz) % ToEltSz)
16355 return SDValue();
16356
16357 unsigned SizeRatio = FromEltSz / ToEltSz;
16358 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
16359
16360 // Create a type on which we perform the shuffle.
16361 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
16362 NumElems * SizeRatio);
16363 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
16364
16365 SDLoc DL(St);
16366 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
16367 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
16368 for (unsigned i = 0; i < NumElems; ++i)
16369 ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1
16370 : i * SizeRatio;
16371
16372 // Can't shuffle using an illegal type.
16373 if (!TLI.isTypeLegal(WideVecVT))
16374 return SDValue();
16375
16376 SDValue Shuff = DAG.getVectorShuffle(
16377 WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec);
16378 // At this point all of the data is stored at the bottom of the
16379 // register. We now need to save it to mem.
16380
16381 // Find the largest store unit
16382 MVT StoreType = MVT::i8;
16383 for (MVT Tp : MVT::integer_valuetypes()) {
16384 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
16385 StoreType = Tp;
16386 }
16387 // Didn't find a legal store type.
16388 if (!TLI.isTypeLegal(StoreType))
16389 return SDValue();
16390
16391 // Bitcast the original vector into a vector of store-size units
16392 EVT StoreVecVT =
16393 EVT::getVectorVT(*DAG.getContext(), StoreType,
16394 VT.getSizeInBits() / EVT(StoreType).getSizeInBits());
16395 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
16396 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
16398 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
16399 TLI.getPointerTy(DAG.getDataLayout()));
16400 SDValue BasePtr = St->getBasePtr();
16401
16402 // Perform one or more big stores into memory.
16403 unsigned E = (ToEltSz * NumElems) / StoreType.getSizeInBits();
16404 for (unsigned I = 0; I < E; I++) {
16405 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType,
16406 ShuffWide, DAG.getIntPtrConstant(I, DL));
16407 SDValue Ch =
16408 DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
16409 St->getAlign(), St->getMemOperand()->getFlags());
16410 BasePtr =
16411 DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment);
16412 Chains.push_back(Ch);
16413 }
16414 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
16415}
16416
16417// Try taking a single vector store from an fpround (which would otherwise turn
16418// into an expensive buildvector) and splitting it into a series of narrowing
16419// stores.
16421 SelectionDAG &DAG) {
16422 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16423 return SDValue();
16424 SDValue Trunc = St->getValue();
16425 if (Trunc->getOpcode() != ISD::FP_ROUND)
16426 return SDValue();
16427 EVT FromVT = Trunc->getOperand(0).getValueType();
16428 EVT ToVT = Trunc.getValueType();
16429 if (!ToVT.isVector())
16430 return SDValue();
16432 EVT ToEltVT = ToVT.getVectorElementType();
16433 EVT FromEltVT = FromVT.getVectorElementType();
16434
16435 if (FromEltVT != MVT::f32 || ToEltVT != MVT::f16)
16436 return SDValue();
16437
16438 unsigned NumElements = 4;
16439 if (FromVT.getVectorNumElements() % NumElements != 0)
16440 return SDValue();
16441
16442 // Test if the Trunc will be convertable to a VMOVN with a shuffle, and if so
16443 // use the VMOVN over splitting the store. We are looking for patterns of:
16444 // !rev: 0 N 1 N+1 2 N+2 ...
16445 // rev: N 0 N+1 1 N+2 2 ...
16446 // The shuffle may either be a single source (in which case N = NumElts/2) or
16447 // two inputs extended with concat to the same size (in which case N =
16448 // NumElts).
16449 auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) {
16450 ArrayRef<int> M = SVN->getMask();
16451 unsigned NumElts = ToVT.getVectorNumElements();
16452 if (SVN->getOperand(1).isUndef())
16453 NumElts /= 2;
16454
16455 unsigned Off0 = Rev ? NumElts : 0;
16456 unsigned Off1 = Rev ? 0 : NumElts;
16457
16458 for (unsigned I = 0; I < NumElts; I += 2) {
16459 if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2))
16460 return false;
16461 if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2))
16462 return false;
16463 }
16464
16465 return true;
16466 };
16467
16468 if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0)))
16469 if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true))
16470 return SDValue();
16471
16472 LLVMContext &C = *DAG.getContext();
16473 SDLoc DL(St);
16474 // Details about the old store
16475 SDValue Ch = St->getChain();
16476 SDValue BasePtr = St->getBasePtr();
16477 Align Alignment = St->getBaseAlign();
16479 AAMDNodes AAInfo = St->getAAInfo();
16480
16481 // We split the store into slices of NumElements. fp16 trunc stores are vcvt
16482 // and then stored as truncating integer stores.
16483 EVT NewFromVT = EVT::getVectorVT(C, FromEltVT, NumElements);
16484 EVT NewToVT = EVT::getVectorVT(
16485 C, EVT::getIntegerVT(C, ToEltVT.getSizeInBits()), NumElements);
16486
16488 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
16489 unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
16490 SDValue NewPtr =
16491 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16492
16493 SDValue Extract =
16494 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
16495 DAG.getConstant(i * NumElements, DL, MVT::i32));
16496
16497 SDValue FPTrunc =
16498 DAG.getNode(ARMISD::VCVTN, DL, MVT::v8f16, DAG.getUNDEF(MVT::v8f16),
16499 Extract, DAG.getConstant(0, DL, MVT::i32));
16500 Extract = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v4i32, FPTrunc);
16501
16502 SDValue Store = DAG.getTruncStore(
16503 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16504 NewToVT, Alignment, MMOFlags, AAInfo);
16505 Stores.push_back(Store);
16506 }
16507 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16508}
16509
16510// Try taking a single vector store from an MVETRUNC (which would otherwise turn
16511// into an expensive buildvector) and splitting it into a series of narrowing
16512// stores.
16514 SelectionDAG &DAG) {
16515 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16516 return SDValue();
16517 SDValue Trunc = St->getValue();
16518 if (Trunc->getOpcode() != ARMISD::MVETRUNC)
16519 return SDValue();
16520 EVT FromVT = Trunc->getOperand(0).getValueType();
16521 EVT ToVT = Trunc.getValueType();
16522
16523 LLVMContext &C = *DAG.getContext();
16524 SDLoc DL(St);
16525 // Details about the old store
16526 SDValue Ch = St->getChain();
16527 SDValue BasePtr = St->getBasePtr();
16528 Align Alignment = St->getBaseAlign();
16530 AAMDNodes AAInfo = St->getAAInfo();
16531
16532 EVT NewToVT = EVT::getVectorVT(C, ToVT.getVectorElementType(),
16533 FromVT.getVectorNumElements());
16534
16536 for (unsigned i = 0; i < Trunc.getNumOperands(); i++) {
16537 unsigned NewOffset =
16538 i * FromVT.getVectorNumElements() * ToVT.getScalarSizeInBits() / 8;
16539 SDValue NewPtr =
16540 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16541
16542 SDValue Extract = Trunc.getOperand(i);
16543 SDValue Store = DAG.getTruncStore(
16544 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16545 NewToVT, Alignment, MMOFlags, AAInfo);
16546 Stores.push_back(Store);
16547 }
16548 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16549}
16550
16551// Given a floating point store from an extracted vector, with an integer
16552// VGETLANE that already exists, store the existing VGETLANEu directly. This can
16553// help reduce fp register pressure, doesn't require the fp extract and allows
16554// use of more integer post-inc stores not available with vstr.
16556 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16557 return SDValue();
16558 SDValue Extract = St->getValue();
16559 EVT VT = Extract.getValueType();
16560 // For now only uses f16. This may be useful for f32 too, but that will
16561 // be bitcast(extract), not the VGETLANEu we currently check here.
16562 if (VT != MVT::f16 || Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16563 return SDValue();
16564
16565 SDNode *GetLane =
16566 DAG.getNodeIfExists(ARMISD::VGETLANEu, DAG.getVTList(MVT::i32),
16567 {Extract.getOperand(0), Extract.getOperand(1)});
16568 if (!GetLane)
16569 return SDValue();
16570
16571 LLVMContext &C = *DAG.getContext();
16572 SDLoc DL(St);
16573 // Create a new integer store to replace the existing floating point version.
16574 SDValue Ch = St->getChain();
16575 SDValue BasePtr = St->getBasePtr();
16576 Align Alignment = St->getBaseAlign();
16578 AAMDNodes AAInfo = St->getAAInfo();
16579 EVT NewToVT = EVT::getIntegerVT(C, VT.getSizeInBits());
16580 SDValue Store = DAG.getTruncStore(Ch, DL, SDValue(GetLane, 0), BasePtr,
16581 St->getPointerInfo(), NewToVT, Alignment,
16582 MMOFlags, AAInfo);
16583
16584 return Store;
16585}
16586
16587/// PerformSTORECombine - Target-specific dag combine xforms for
16588/// ISD::STORE.
16591 const ARMSubtarget *Subtarget) {
16593 if (St->isVolatile())
16594 return SDValue();
16595 SDValue StVal = St->getValue();
16596 EVT VT = StVal.getValueType();
16597
16598 if (Subtarget->hasNEON())
16599 if (SDValue Store = PerformTruncatingStoreCombine(St, DCI.DAG))
16600 return Store;
16601
16602 if (Subtarget->hasMVEFloatOps())
16603 if (SDValue NewToken = PerformSplittingToNarrowingStores(St, DCI.DAG))
16604 return NewToken;
16605
16606 if (Subtarget->hasMVEIntegerOps()) {
16607 if (SDValue NewChain = PerformExtractFpToIntStores(St, DCI.DAG))
16608 return NewChain;
16609 if (SDValue NewToken =
16611 return NewToken;
16612 }
16613
16614 if (!ISD::isNormalStore(St))
16615 return SDValue();
16616
16617 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
16618 // ARM stores of arguments in the same cache line.
16619 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
16620 StVal.getNode()->hasOneUse()) {
16621 SelectionDAG &DAG = DCI.DAG;
16622 bool isBigEndian = DAG.getDataLayout().isBigEndian();
16623 SDLoc DL(St);
16624 SDValue BasePtr = St->getBasePtr();
16625 SDValue NewST1 = DAG.getStore(
16626 St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
16627 BasePtr, St->getPointerInfo(), St->getBaseAlign(),
16628 St->getMemOperand()->getFlags());
16629
16630 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
16631 DAG.getConstant(4, DL, MVT::i32));
16632 return DAG.getStore(NewST1.getValue(0), DL,
16633 StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
16634 OffsetPtr, St->getPointerInfo().getWithOffset(4),
16635 St->getBaseAlign(), St->getMemOperand()->getFlags());
16636 }
16637
16638 if (StVal.getValueType() == MVT::i64 &&
16640
16641 // Bitcast an i64 store extracted from a vector to f64.
16642 // Otherwise, the i64 value will be legalized to a pair of i32 values.
16643 SelectionDAG &DAG = DCI.DAG;
16644 SDLoc dl(StVal);
16645 SDValue IntVec = StVal.getOperand(0);
16646 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
16648 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
16649 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
16650 Vec, StVal.getOperand(1));
16651 dl = SDLoc(N);
16652 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
16653 // Make the DAGCombiner fold the bitcasts.
16654 DCI.AddToWorklist(Vec.getNode());
16655 DCI.AddToWorklist(ExtElt.getNode());
16656 DCI.AddToWorklist(V.getNode());
16657 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
16658 St->getPointerInfo(), St->getAlign(),
16659 St->getMemOperand()->getFlags(), St->getAAInfo());
16660 }
16661
16662 // If this is a legal vector store, try to combine it into a VST1_UPD.
16663 if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() &&
16665 return CombineBaseUpdate(N, DCI);
16666
16667 return SDValue();
16668}
16669
16670/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
16671/// can replace combinations of VMUL and VCVT (floating-point to integer)
16672/// when the VMUL has a constant operand that is a power of 2.
16673///
16674/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
16675/// vmul.f32 d16, d17, d16
16676/// vcvt.s32.f32 d16, d16
16677/// becomes:
16678/// vcvt.s32.f32 d16, d16, #3
16680 const ARMSubtarget *Subtarget) {
16681 if (!Subtarget->hasNEON())
16682 return SDValue();
16683
16684 SDValue Op = N->getOperand(0);
16685 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
16686 Op.getOpcode() != ISD::FMUL)
16687 return SDValue();
16688
16689 SDValue ConstVec = Op->getOperand(1);
16690 if (!isa<BuildVectorSDNode>(ConstVec))
16691 return SDValue();
16692
16693 MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
16694 uint32_t FloatBits = FloatTy.getSizeInBits();
16695 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
16696 uint32_t IntBits = IntTy.getSizeInBits();
16697 unsigned NumLanes = Op.getValueType().getVectorNumElements();
16698 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16699 // These instructions only exist converting from f32 to i32. We can handle
16700 // smaller integers by generating an extra truncate, but larger ones would
16701 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
16702 // these intructions only support v2i32/v4i32 types.
16703 return SDValue();
16704 }
16705
16706 BitVector UndefElements;
16708 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
16709 if (C == -1 || C == 0 || C > 32)
16710 return SDValue();
16711
16712 SDLoc dl(N);
16713 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
16714 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
16715 Intrinsic::arm_neon_vcvtfp2fxu;
16716 SDValue FixConv = DAG.getNode(
16717 ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
16718 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
16719 DAG.getConstant(C, dl, MVT::i32));
16720
16721 if (IntBits < FloatBits)
16722 FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
16723
16724 return FixConv;
16725}
16726
16728 const ARMSubtarget *Subtarget) {
16729 if (!Subtarget->hasMVEFloatOps())
16730 return SDValue();
16731
16732 // Turn (fadd x, (vselect c, y, -0.0)) into (vselect c, (fadd x, y), x)
16733 // The second form can be more easily turned into a predicated vadd, and
16734 // possibly combined into a fma to become a predicated vfma.
16735 SDValue Op0 = N->getOperand(0);
16736 SDValue Op1 = N->getOperand(1);
16737 EVT VT = N->getValueType(0);
16738 SDLoc DL(N);
16739
16740 // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set,
16741 // which these VMOV's represent.
16742 auto isIdentitySplat = [&](SDValue Op, bool NSZ) {
16743 if (Op.getOpcode() != ISD::BITCAST ||
16744 Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)
16745 return false;
16746 uint64_t ImmVal = Op.getOperand(0).getConstantOperandVal(0);
16747 if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ)))
16748 return true;
16749 if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ)))
16750 return true;
16751 return false;
16752 };
16753
16754 if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT)
16755 std::swap(Op0, Op1);
16756
16757 if (Op1.getOpcode() != ISD::VSELECT)
16758 return SDValue();
16759
16760 SDNodeFlags FaddFlags = N->getFlags();
16761 bool NSZ = FaddFlags.hasNoSignedZeros();
16762 if (!isIdentitySplat(Op1.getOperand(2), NSZ))
16763 return SDValue();
16764
16765 SDValue FAdd =
16766 DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), FaddFlags);
16767 return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0, FaddFlags);
16768}
16769
16771 SDValue LHS = N->getOperand(0);
16772 SDValue RHS = N->getOperand(1);
16773 EVT VT = N->getValueType(0);
16774 SDLoc DL(N);
16775
16776 if (!N->getFlags().hasAllowReassociation())
16777 return SDValue();
16778
16779 // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)
16780 auto ReassocComplex = [&](SDValue A, SDValue B) {
16781 if (A.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
16782 return SDValue();
16783 unsigned Opc = A.getConstantOperandVal(0);
16784 if (Opc != Intrinsic::arm_mve_vcmlaq)
16785 return SDValue();
16786 SDValue VCMLA = DAG.getNode(
16787 ISD::INTRINSIC_WO_CHAIN, DL, VT, A.getOperand(0), A.getOperand(1),
16788 DAG.getNode(ISD::FADD, DL, VT, A.getOperand(2), B, N->getFlags()),
16789 A.getOperand(3), A.getOperand(4));
16790 VCMLA->setFlags(A->getFlags());
16791 return VCMLA;
16792 };
16793 if (SDValue R = ReassocComplex(LHS, RHS))
16794 return R;
16795 if (SDValue R = ReassocComplex(RHS, LHS))
16796 return R;
16797
16798 return SDValue();
16799}
16800
16802 const ARMSubtarget *Subtarget) {
16803 if (SDValue S = PerformFAddVSelectCombine(N, DAG, Subtarget))
16804 return S;
16805 if (SDValue S = PerformFADDVCMLACombine(N, DAG))
16806 return S;
16807 return SDValue();
16808}
16809
16810/// PerformVMulVCTPCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
16811/// can replace combinations of VCVT (integer to floating-point) and VMUL
16812/// when the VMUL has a constant operand that is a power of 2.
16813///
16814/// Example (assume d17 = <float 0.125, float 0.125>):
16815/// vcvt.f32.s32 d16, d16
16816/// vmul.f32 d16, d16, d17
16817/// becomes:
16818/// vcvt.f32.s32 d16, d16, #3
16820 const ARMSubtarget *Subtarget) {
16821 if (!Subtarget->hasNEON())
16822 return SDValue();
16823
16824 SDValue Op = N->getOperand(0);
16825 unsigned OpOpcode = Op.getNode()->getOpcode();
16826 if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
16827 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
16828 return SDValue();
16829
16830 SDValue ConstVec = N->getOperand(1);
16831 if (!isa<BuildVectorSDNode>(ConstVec))
16832 return SDValue();
16833
16834 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
16835 uint32_t FloatBits = FloatTy.getSizeInBits();
16836 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
16837 uint32_t IntBits = IntTy.getSizeInBits();
16838 unsigned NumLanes = Op.getValueType().getVectorNumElements();
16839 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16840 // These instructions only exist converting from i32 to f32. We can handle
16841 // smaller integers by generating an extra extend, but larger ones would
16842 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
16843 // these intructions only support v2i32/v4i32 types.
16844 return SDValue();
16845 }
16846
16847 ConstantFPSDNode *CN = isConstOrConstSplatFP(ConstVec, true);
16848 APFloat Recip(0.0f);
16849 if (!CN || !CN->getValueAPF().getExactInverse(&Recip))
16850 return SDValue();
16851
16852 bool IsExact;
16853 APSInt IntVal(33);
16854 if (Recip.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) !=
16855 APFloat::opOK ||
16856 !IsExact)
16857 return SDValue();
16858
16859 int32_t C = IntVal.exactLogBase2();
16860 if (C == -1 || C == 0 || C > 32)
16861 return SDValue();
16862
16863 SDLoc DL(N);
16864 bool isSigned = OpOpcode == ISD::SINT_TO_FP;
16865 SDValue ConvInput = Op.getOperand(0);
16866 if (IntBits < FloatBits)
16868 NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, ConvInput);
16869
16870 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp
16871 : Intrinsic::arm_neon_vcvtfxu2fp;
16872 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
16873 DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
16874 DAG.getConstant(C, DL, MVT::i32));
16875}
16876
16878 const ARMSubtarget *ST) {
16879 if (!ST->hasMVEIntegerOps())
16880 return SDValue();
16881
16882 assert(N->getOpcode() == ISD::VECREDUCE_ADD);
16883 EVT ResVT = N->getValueType(0);
16884 SDValue N0 = N->getOperand(0);
16885 SDLoc dl(N);
16886
16887 // Try to turn vecreduce_add(add(x, y)) into vecreduce(x) + vecreduce(y)
16888 if (ResVT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
16889 (N0.getValueType() == MVT::v4i32 || N0.getValueType() == MVT::v8i16 ||
16890 N0.getValueType() == MVT::v16i8)) {
16891 SDValue Red0 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(0));
16892 SDValue Red1 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(1));
16893 return DAG.getNode(ISD::ADD, dl, ResVT, Red0, Red1);
16894 }
16895
16896 // We are looking for something that will have illegal types if left alone,
16897 // but that we can convert to a single instruction under MVE. For example
16898 // vecreduce_add(sext(A, v8i32)) => VADDV.s16 A
16899 // or
16900 // vecreduce_add(mul(zext(A, v16i32), zext(B, v16i32))) => VMLADAV.u8 A, B
16901
16902 // The legal cases are:
16903 // VADDV u/s 8/16/32
16904 // VMLAV u/s 8/16/32
16905 // VADDLV u/s 32
16906 // VMLALV u/s 16/32
16907
16908 // If the input vector is smaller than legal (v4i8/v4i16 for example) we can
16909 // extend it and use v4i32 instead.
16910 auto ExtTypeMatches = [](SDValue A, ArrayRef<MVT> ExtTypes) {
16911 EVT AVT = A.getValueType();
16912 return any_of(ExtTypes, [&](MVT Ty) {
16913 return AVT.getVectorNumElements() == Ty.getVectorNumElements() &&
16914 AVT.bitsLE(Ty);
16915 });
16916 };
16917 auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) {
16918 EVT AVT = A.getValueType();
16919 if (!AVT.is128BitVector())
16920 A = DAG.getNode(ExtendCode, dl,
16922 128 / AVT.getVectorMinNumElements())),
16923 A);
16924 return A;
16925 };
16926 auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes) {
16927 if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
16928 return SDValue();
16929 SDValue A = N0->getOperand(0);
16930 if (ExtTypeMatches(A, ExtTypes))
16931 return ExtendIfNeeded(A, ExtendCode);
16932 return SDValue();
16933 };
16934 auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode,
16935 ArrayRef<MVT> ExtTypes, SDValue &Mask) {
16936 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
16938 return SDValue();
16939 Mask = N0->getOperand(0);
16940 SDValue Ext = N0->getOperand(1);
16941 if (Ext->getOpcode() != ExtendCode)
16942 return SDValue();
16943 SDValue A = Ext->getOperand(0);
16944 if (ExtTypeMatches(A, ExtTypes))
16945 return ExtendIfNeeded(A, ExtendCode);
16946 return SDValue();
16947 };
16948 auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
16949 SDValue &A, SDValue &B) {
16950 // For a vmla we are trying to match a larger pattern:
16951 // ExtA = sext/zext A
16952 // ExtB = sext/zext B
16953 // Mul = mul ExtA, ExtB
16954 // vecreduce.add Mul
16955 // There might also be en extra extend between the mul and the addreduce, so
16956 // long as the bitwidth is high enough to make them equivalent (for example
16957 // original v8i16 might be mul at v8i32 and the reduce happens at v8i64).
16958 if (ResVT != RetTy)
16959 return false;
16960 SDValue Mul = N0;
16961 if (Mul->getOpcode() == ExtendCode &&
16962 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
16963 ResVT.getScalarSizeInBits())
16964 Mul = Mul->getOperand(0);
16965 if (Mul->getOpcode() != ISD::MUL)
16966 return false;
16967 SDValue ExtA = Mul->getOperand(0);
16968 SDValue ExtB = Mul->getOperand(1);
16969 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
16970 return false;
16971 A = ExtA->getOperand(0);
16972 B = ExtB->getOperand(0);
16973 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
16974 A = ExtendIfNeeded(A, ExtendCode);
16975 B = ExtendIfNeeded(B, ExtendCode);
16976 return true;
16977 }
16978 return false;
16979 };
16980 auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
16981 SDValue &A, SDValue &B, SDValue &Mask) {
16982 // Same as the pattern above with a select for the zero predicated lanes
16983 // ExtA = sext/zext A
16984 // ExtB = sext/zext B
16985 // Mul = mul ExtA, ExtB
16986 // N0 = select Mask, Mul, 0
16987 // vecreduce.add N0
16988 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
16990 return false;
16991 Mask = N0->getOperand(0);
16992 SDValue Mul = N0->getOperand(1);
16993 if (Mul->getOpcode() == ExtendCode &&
16994 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
16995 ResVT.getScalarSizeInBits())
16996 Mul = Mul->getOperand(0);
16997 if (Mul->getOpcode() != ISD::MUL)
16998 return false;
16999 SDValue ExtA = Mul->getOperand(0);
17000 SDValue ExtB = Mul->getOperand(1);
17001 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17002 return false;
17003 A = ExtA->getOperand(0);
17004 B = ExtB->getOperand(0);
17005 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17006 A = ExtendIfNeeded(A, ExtendCode);
17007 B = ExtendIfNeeded(B, ExtendCode);
17008 return true;
17009 }
17010 return false;
17011 };
17012 auto Create64bitNode = [&](unsigned Opcode, ArrayRef<SDValue> Ops) {
17013 // Split illegal MVT::v16i8->i64 vector reductions into two legal v8i16->i64
17014 // reductions. The operands are extended with MVEEXT, but as they are
17015 // reductions the lane orders do not matter. MVEEXT may be combined with
17016 // loads to produce two extending loads, or else they will be expanded to
17017 // VREV/VMOVL.
17018 EVT VT = Ops[0].getValueType();
17019 if (VT == MVT::v16i8) {
17020 assert((Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) &&
17021 "Unexpected illegal long reduction opcode");
17022 bool IsUnsigned = Opcode == ARMISD::VMLALVu;
17023
17024 SDValue Ext0 =
17025 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17026 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[0]);
17027 SDValue Ext1 =
17028 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17029 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[1]);
17030
17031 SDValue MLA0 = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
17032 Ext0, Ext1);
17033 SDValue MLA1 =
17034 DAG.getNode(IsUnsigned ? ARMISD::VMLALVAu : ARMISD::VMLALVAs, dl,
17035 DAG.getVTList(MVT::i32, MVT::i32), MLA0, MLA0.getValue(1),
17036 Ext0.getValue(1), Ext1.getValue(1));
17037 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, MLA1, MLA1.getValue(1));
17038 }
17039 SDValue Node = DAG.getNode(Opcode, dl, {MVT::i32, MVT::i32}, Ops);
17040 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Node,
17041 SDValue(Node.getNode(), 1));
17042 };
17043
17044 SDValue A, B;
17045 SDValue Mask;
17046 if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17047 return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
17048 if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17049 return DAG.getNode(ARMISD::VMLAVu, dl, ResVT, A, B);
17050 if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17051 A, B))
17052 return Create64bitNode(ARMISD::VMLALVs, {A, B});
17053 if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17054 A, B))
17055 return Create64bitNode(ARMISD::VMLALVu, {A, B});
17056 if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B))
17057 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17058 DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B));
17059 if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B))
17060 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17061 DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B));
17062
17063 if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17064 Mask))
17065 return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask);
17066 if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17067 Mask))
17068 return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask);
17069 if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17070 Mask))
17071 return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask});
17072 if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17073 Mask))
17074 return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask});
17075 if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask))
17076 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17077 DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask));
17078 if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask))
17079 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17080 DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
17081
17082 if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
17083 return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
17084 if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
17085 return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
17086 if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
17087 return Create64bitNode(ARMISD::VADDLVs, {A});
17088 if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
17089 return Create64bitNode(ARMISD::VADDLVu, {A});
17090 if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
17091 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17092 DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
17093 if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
17094 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17095 DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
17096
17097 if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17098 return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
17099 if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17100 return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
17101 if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))
17102 return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
17103 if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))
17104 return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
17105 if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
17106 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17107 DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
17108 if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
17109 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17110 DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
17111
17112 // Some complications. We can get a case where the two inputs of the mul are
17113 // the same, then the output sext will have been helpfully converted to a
17114 // zext. Turn it back.
17115 SDValue Op = N0;
17116 if (Op->getOpcode() == ISD::VSELECT)
17117 Op = Op->getOperand(1);
17118 if (Op->getOpcode() == ISD::ZERO_EXTEND &&
17119 Op->getOperand(0)->getOpcode() == ISD::MUL) {
17120 SDValue Mul = Op->getOperand(0);
17121 if (Mul->getOperand(0) == Mul->getOperand(1) &&
17122 Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
17123 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
17124 if (Op != N0)
17125 Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
17126 N0->getOperand(0), Ext, N0->getOperand(2));
17127 return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext);
17128 }
17129 }
17130
17131 return SDValue();
17132}
17133
17134// Looks for vaddv(shuffle) or vmlav(shuffle, shuffle), with a shuffle where all
17135// the lanes are used. Due to the reduction being commutative the shuffle can be
17136// removed.
17138 unsigned VecOp = N->getOperand(0).getValueType().isVector() ? 0 : 2;
17139 auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp));
17140 if (!Shuf || !Shuf->getOperand(1).isUndef())
17141 return SDValue();
17142
17143 // Check all elements are used once in the mask.
17144 ArrayRef<int> Mask = Shuf->getMask();
17145 APInt SetElts(Mask.size(), 0);
17146 for (int E : Mask) {
17147 if (E < 0 || E >= (int)Mask.size())
17148 return SDValue();
17149 SetElts.setBit(E);
17150 }
17151 if (!SetElts.isAllOnes())
17152 return SDValue();
17153
17154 if (N->getNumOperands() != VecOp + 1) {
17155 auto *Shuf2 = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp + 1));
17156 if (!Shuf2 || !Shuf2->getOperand(1).isUndef() || Shuf2->getMask() != Mask)
17157 return SDValue();
17158 }
17159
17161 for (SDValue Op : N->ops()) {
17162 if (Op.getValueType().isVector())
17163 Ops.push_back(Op.getOperand(0));
17164 else
17165 Ops.push_back(Op);
17166 }
17167 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops);
17168}
17169
17172 SDValue Op0 = N->getOperand(0);
17173 SDValue Op1 = N->getOperand(1);
17174 unsigned IsTop = N->getConstantOperandVal(2);
17175
17176 // VMOVNT a undef -> a
17177 // VMOVNB a undef -> a
17178 // VMOVNB undef a -> a
17179 if (Op1->isUndef())
17180 return Op0;
17181 if (Op0->isUndef() && !IsTop)
17182 return Op1;
17183
17184 // VMOVNt(c, VQMOVNb(a, b)) => VQMOVNt(c, b)
17185 // VMOVNb(c, VQMOVNb(a, b)) => VQMOVNb(c, b)
17186 if ((Op1->getOpcode() == ARMISD::VQMOVNs ||
17187 Op1->getOpcode() == ARMISD::VQMOVNu) &&
17188 Op1->getConstantOperandVal(2) == 0)
17189 return DCI.DAG.getNode(Op1->getOpcode(), SDLoc(Op1), N->getValueType(0),
17190 Op0, Op1->getOperand(1), N->getOperand(2));
17191
17192 // Only the bottom lanes from Qm (Op1) and either the top or bottom lanes from
17193 // Qd (Op0) are demanded from a VMOVN, depending on whether we are inserting
17194 // into the top or bottom lanes.
17195 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17196 APInt Op1DemandedElts = APInt::getSplat(NumElts, APInt::getLowBitsSet(2, 1));
17197 APInt Op0DemandedElts =
17198 IsTop ? Op1DemandedElts
17199 : APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1));
17200
17201 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17202 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17203 return SDValue(N, 0);
17204 if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, DCI))
17205 return SDValue(N, 0);
17206
17207 return SDValue();
17208}
17209
17212 SDValue Op0 = N->getOperand(0);
17213 unsigned IsTop = N->getConstantOperandVal(2);
17214
17215 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17216 APInt Op0DemandedElts =
17217 APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
17218 : APInt::getHighBitsSet(2, 1));
17219
17220 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17221 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17222 return SDValue(N, 0);
17223 return SDValue();
17224}
17225
17228 EVT VT = N->getValueType(0);
17229 SDValue LHS = N->getOperand(0);
17230 SDValue RHS = N->getOperand(1);
17231
17232 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
17233 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
17234 // Turn VQDMULH(shuffle, shuffle) -> shuffle(VQDMULH)
17235 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
17236 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
17237 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
17238 SDLoc DL(N);
17239 SDValue NewBinOp = DCI.DAG.getNode(N->getOpcode(), DL, VT,
17240 LHS.getOperand(0), RHS.getOperand(0));
17241 SDValue UndefV = LHS.getOperand(1);
17242 return DCI.DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
17243 }
17244 return SDValue();
17245}
17246
17248 SDLoc DL(N);
17249 SDValue Op0 = N->getOperand(0);
17250 SDValue Op1 = N->getOperand(1);
17251
17252 // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from
17253 // uses of the intrinsics.
17254 if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
17255 int ShiftAmt = C->getSExtValue();
17256 if (ShiftAmt == 0) {
17257 SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL);
17258 DAG.ReplaceAllUsesWith(N, Merge.getNode());
17259 return SDValue();
17260 }
17261
17262 if (ShiftAmt >= -32 && ShiftAmt < 0) {
17263 unsigned NewOpcode =
17264 N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
17265 SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,
17266 DAG.getConstant(-ShiftAmt, DL, MVT::i32));
17267 DAG.ReplaceAllUsesWith(N, NewShift.getNode());
17268 return NewShift;
17269 }
17270 }
17271
17272 return SDValue();
17273}
17274
17275/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
17277 DAGCombinerInfo &DCI) const {
17278 SelectionDAG &DAG = DCI.DAG;
17279 unsigned IntNo = N->getConstantOperandVal(0);
17280 switch (IntNo) {
17281 default:
17282 // Don't do anything for most intrinsics.
17283 break;
17284
17285 // Vector shifts: check for immediate versions and lower them.
17286 // Note: This is done during DAG combining instead of DAG legalizing because
17287 // the build_vectors for 64-bit vector element shift counts are generally
17288 // not legal, and it is hard to see their values after they get legalized to
17289 // loads from a constant pool.
17290 case Intrinsic::arm_neon_vshifts:
17291 case Intrinsic::arm_neon_vshiftu:
17292 case Intrinsic::arm_neon_vrshifts:
17293 case Intrinsic::arm_neon_vrshiftu:
17294 case Intrinsic::arm_neon_vrshiftn:
17295 case Intrinsic::arm_neon_vqshifts:
17296 case Intrinsic::arm_neon_vqshiftu:
17297 case Intrinsic::arm_neon_vqshiftsu:
17298 case Intrinsic::arm_neon_vqshiftns:
17299 case Intrinsic::arm_neon_vqshiftnu:
17300 case Intrinsic::arm_neon_vqshiftnsu:
17301 case Intrinsic::arm_neon_vqrshiftns:
17302 case Intrinsic::arm_neon_vqrshiftnu:
17303 case Intrinsic::arm_neon_vqrshiftnsu: {
17304 EVT VT = N->getOperand(1).getValueType();
17305 int64_t Cnt;
17306 unsigned VShiftOpc = 0;
17307
17308 switch (IntNo) {
17309 case Intrinsic::arm_neon_vshifts:
17310 case Intrinsic::arm_neon_vshiftu:
17311 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
17312 VShiftOpc = ARMISD::VSHLIMM;
17313 break;
17314 }
17315 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
17316 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
17317 : ARMISD::VSHRuIMM);
17318 break;
17319 }
17320 return SDValue();
17321
17322 case Intrinsic::arm_neon_vrshifts:
17323 case Intrinsic::arm_neon_vrshiftu:
17324 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
17325 break;
17326 return SDValue();
17327
17328 case Intrinsic::arm_neon_vqshifts:
17329 case Intrinsic::arm_neon_vqshiftu:
17330 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17331 break;
17332 return SDValue();
17333
17334 case Intrinsic::arm_neon_vqshiftsu:
17335 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17336 break;
17337 llvm_unreachable("invalid shift count for vqshlu intrinsic");
17338
17339 case Intrinsic::arm_neon_vrshiftn:
17340 case Intrinsic::arm_neon_vqshiftns:
17341 case Intrinsic::arm_neon_vqshiftnu:
17342 case Intrinsic::arm_neon_vqshiftnsu:
17343 case Intrinsic::arm_neon_vqrshiftns:
17344 case Intrinsic::arm_neon_vqrshiftnu:
17345 case Intrinsic::arm_neon_vqrshiftnsu:
17346 // Narrowing shifts require an immediate right shift.
17347 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
17348 break;
17349 llvm_unreachable("invalid shift count for narrowing vector shift "
17350 "intrinsic");
17351
17352 default:
17353 llvm_unreachable("unhandled vector shift");
17354 }
17355
17356 switch (IntNo) {
17357 case Intrinsic::arm_neon_vshifts:
17358 case Intrinsic::arm_neon_vshiftu:
17359 // Opcode already set above.
17360 break;
17361 case Intrinsic::arm_neon_vrshifts:
17362 VShiftOpc = ARMISD::VRSHRsIMM;
17363 break;
17364 case Intrinsic::arm_neon_vrshiftu:
17365 VShiftOpc = ARMISD::VRSHRuIMM;
17366 break;
17367 case Intrinsic::arm_neon_vrshiftn:
17368 VShiftOpc = ARMISD::VRSHRNIMM;
17369 break;
17370 case Intrinsic::arm_neon_vqshifts:
17371 VShiftOpc = ARMISD::VQSHLsIMM;
17372 break;
17373 case Intrinsic::arm_neon_vqshiftu:
17374 VShiftOpc = ARMISD::VQSHLuIMM;
17375 break;
17376 case Intrinsic::arm_neon_vqshiftsu:
17377 VShiftOpc = ARMISD::VQSHLsuIMM;
17378 break;
17379 case Intrinsic::arm_neon_vqshiftns:
17380 VShiftOpc = ARMISD::VQSHRNsIMM;
17381 break;
17382 case Intrinsic::arm_neon_vqshiftnu:
17383 VShiftOpc = ARMISD::VQSHRNuIMM;
17384 break;
17385 case Intrinsic::arm_neon_vqshiftnsu:
17386 VShiftOpc = ARMISD::VQSHRNsuIMM;
17387 break;
17388 case Intrinsic::arm_neon_vqrshiftns:
17389 VShiftOpc = ARMISD::VQRSHRNsIMM;
17390 break;
17391 case Intrinsic::arm_neon_vqrshiftnu:
17392 VShiftOpc = ARMISD::VQRSHRNuIMM;
17393 break;
17394 case Intrinsic::arm_neon_vqrshiftnsu:
17395 VShiftOpc = ARMISD::VQRSHRNsuIMM;
17396 break;
17397 }
17398
17399 SDLoc dl(N);
17400 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17401 N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
17402 }
17403
17404 case Intrinsic::arm_neon_vshiftins: {
17405 EVT VT = N->getOperand(1).getValueType();
17406 int64_t Cnt;
17407 unsigned VShiftOpc = 0;
17408
17409 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
17410 VShiftOpc = ARMISD::VSLIIMM;
17411 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
17412 VShiftOpc = ARMISD::VSRIIMM;
17413 else {
17414 llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
17415 }
17416
17417 SDLoc dl(N);
17418 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17419 N->getOperand(1), N->getOperand(2),
17420 DAG.getConstant(Cnt, dl, MVT::i32));
17421 }
17422
17423 case Intrinsic::arm_neon_vqrshifts:
17424 case Intrinsic::arm_neon_vqrshiftu:
17425 // No immediate versions of these to check for.
17426 break;
17427
17428 case Intrinsic::arm_neon_vbsl: {
17429 SDLoc dl(N);
17430 return DAG.getNode(ARMISD::VBSP, dl, N->getValueType(0), N->getOperand(1),
17431 N->getOperand(2), N->getOperand(3));
17432 }
17433 case Intrinsic::arm_mve_vqdmlah:
17434 case Intrinsic::arm_mve_vqdmlash:
17435 case Intrinsic::arm_mve_vqrdmlah:
17436 case Intrinsic::arm_mve_vqrdmlash:
17437 case Intrinsic::arm_mve_vmla_n_predicated:
17438 case Intrinsic::arm_mve_vmlas_n_predicated:
17439 case Intrinsic::arm_mve_vqdmlah_predicated:
17440 case Intrinsic::arm_mve_vqdmlash_predicated:
17441 case Intrinsic::arm_mve_vqrdmlah_predicated:
17442 case Intrinsic::arm_mve_vqrdmlash_predicated: {
17443 // These intrinsics all take an i32 scalar operand which is narrowed to the
17444 // size of a single lane of the vector type they return. So we don't need
17445 // any bits of that operand above that point, which allows us to eliminate
17446 // uxth/sxth.
17447 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
17448 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17449 if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))
17450 return SDValue();
17451 break;
17452 }
17453
17454 case Intrinsic::arm_mve_minv:
17455 case Intrinsic::arm_mve_maxv:
17456 case Intrinsic::arm_mve_minav:
17457 case Intrinsic::arm_mve_maxav:
17458 case Intrinsic::arm_mve_minv_predicated:
17459 case Intrinsic::arm_mve_maxv_predicated:
17460 case Intrinsic::arm_mve_minav_predicated:
17461 case Intrinsic::arm_mve_maxav_predicated: {
17462 // These intrinsics all take an i32 scalar operand which is narrowed to the
17463 // size of a single lane of the vector type they take as the other input.
17464 unsigned BitWidth = N->getOperand(2)->getValueType(0).getScalarSizeInBits();
17465 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17466 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
17467 return SDValue();
17468 break;
17469 }
17470
17471 case Intrinsic::arm_mve_addv: {
17472 // Turn this intrinsic straight into the appropriate ARMISD::VADDV node,
17473 // which allow PerformADDVecReduce to turn it into VADDLV when possible.
17474 bool Unsigned = N->getConstantOperandVal(2);
17475 unsigned Opc = Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs;
17476 return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1));
17477 }
17478
17479 case Intrinsic::arm_mve_addlv:
17480 case Intrinsic::arm_mve_addlv_predicated: {
17481 // Same for these, but ARMISD::VADDLV has to be followed by a BUILD_PAIR
17482 // which recombines the two outputs into an i64
17483 bool Unsigned = N->getConstantOperandVal(2);
17484 unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?
17485 (Unsigned ? ARMISD::VADDLVu : ARMISD::VADDLVs) :
17486 (Unsigned ? ARMISD::VADDLVpu : ARMISD::VADDLVps);
17487
17489 for (unsigned i = 1, e = N->getNumOperands(); i < e; i++)
17490 if (i != 2) // skip the unsigned flag
17491 Ops.push_back(N->getOperand(i));
17492
17493 SDLoc dl(N);
17494 SDValue val = DAG.getNode(Opc, dl, {MVT::i32, MVT::i32}, Ops);
17495 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, val.getValue(0),
17496 val.getValue(1));
17497 }
17498 }
17499
17500 return SDValue();
17501}
17502
17503/// PerformShiftCombine - Checks for immediate versions of vector shifts and
17504/// lowers them. As with the vector shift intrinsics, this is done during DAG
17505/// combining instead of DAG legalizing because the build_vectors for 64-bit
17506/// vector element shift counts are generally not legal, and it is hard to see
17507/// their values after they get legalized to loads from a constant pool.
17510 const ARMSubtarget *ST) {
17511 SelectionDAG &DAG = DCI.DAG;
17512 EVT VT = N->getValueType(0);
17513
17514 if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
17515 N->getOperand(0)->getOpcode() == ISD::AND &&
17516 N->getOperand(0)->hasOneUse()) {
17517 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
17518 return SDValue();
17519 // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't
17520 // usually show up because instcombine prefers to canonicalize it to
17521 // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come
17522 // out of GEP lowering in some cases.
17523 SDValue N0 = N->getOperand(0);
17524 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
17525 if (!ShiftAmtNode)
17526 return SDValue();
17527 uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
17528 ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17529 if (!AndMaskNode)
17530 return SDValue();
17531 uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
17532 // Don't transform uxtb/uxth.
17533 if (AndMask == 255 || AndMask == 65535)
17534 return SDValue();
17535 if (isMask_32(AndMask)) {
17536 uint32_t MaskedBits = llvm::countl_zero(AndMask);
17537 if (MaskedBits > ShiftAmt) {
17538 SDLoc DL(N);
17539 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
17540 DAG.getConstant(MaskedBits, DL, MVT::i32));
17541 return DAG.getNode(
17542 ISD::SRL, DL, MVT::i32, SHL,
17543 DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
17544 }
17545 }
17546 }
17547
17548 // Nothing to be done for scalar shifts.
17549 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17550 if (!VT.isVector() || !TLI.isTypeLegal(VT))
17551 return SDValue();
17552 if (ST->hasMVEIntegerOps())
17553 return SDValue();
17554
17555 int64_t Cnt;
17556
17557 switch (N->getOpcode()) {
17558 default: llvm_unreachable("unexpected shift opcode");
17559
17560 case ISD::SHL:
17561 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
17562 SDLoc dl(N);
17563 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
17564 DAG.getConstant(Cnt, dl, MVT::i32));
17565 }
17566 break;
17567
17568 case ISD::SRA:
17569 case ISD::SRL:
17570 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
17571 unsigned VShiftOpc =
17572 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
17573 SDLoc dl(N);
17574 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
17575 DAG.getConstant(Cnt, dl, MVT::i32));
17576 }
17577 }
17578 return SDValue();
17579}
17580
17581// Look for a sign/zero/fpextend extend of a larger than legal load. This can be
17582// split into multiple extending loads, which are simpler to deal with than an
17583// arbitrary extend. For fp extends we use an integer extending load and a VCVTL
17584// to convert the type to an f32.
17586 SDValue N0 = N->getOperand(0);
17587 if (N0.getOpcode() != ISD::LOAD)
17588 return SDValue();
17590 if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() ||
17591 LD->getExtensionType() != ISD::NON_EXTLOAD)
17592 return SDValue();
17593 EVT FromVT = LD->getValueType(0);
17594 EVT ToVT = N->getValueType(0);
17595 if (!ToVT.isVector())
17596 return SDValue();
17598 EVT ToEltVT = ToVT.getVectorElementType();
17599 EVT FromEltVT = FromVT.getVectorElementType();
17600
17601 unsigned NumElements = 0;
17602 if (ToEltVT == MVT::i32 && FromEltVT == MVT::i8)
17603 NumElements = 4;
17604 if (ToEltVT == MVT::f32 && FromEltVT == MVT::f16)
17605 NumElements = 4;
17606 if (NumElements == 0 ||
17607 (FromEltVT != MVT::f16 && FromVT.getVectorNumElements() == NumElements) ||
17608 FromVT.getVectorNumElements() % NumElements != 0 ||
17609 !isPowerOf2_32(NumElements))
17610 return SDValue();
17611
17612 LLVMContext &C = *DAG.getContext();
17613 SDLoc DL(LD);
17614 // Details about the old load
17615 SDValue Ch = LD->getChain();
17616 SDValue BasePtr = LD->getBasePtr();
17617 Align Alignment = LD->getBaseAlign();
17618 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
17619 AAMDNodes AAInfo = LD->getAAInfo();
17620
17621 ISD::LoadExtType NewExtType =
17622 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
17623 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
17624 EVT NewFromVT = EVT::getVectorVT(
17625 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
17626 EVT NewToVT = EVT::getVectorVT(
17627 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
17628
17631 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
17632 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
17633 SDValue NewPtr =
17634 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
17635
17636 SDValue NewLoad =
17637 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
17638 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
17639 Alignment, MMOFlags, AAInfo);
17640 Loads.push_back(NewLoad);
17641 Chains.push_back(SDValue(NewLoad.getNode(), 1));
17642 }
17643
17644 // Float truncs need to extended with VCVTB's into their floating point types.
17645 if (FromEltVT == MVT::f16) {
17647
17648 for (unsigned i = 0; i < Loads.size(); i++) {
17649 SDValue LoadBC =
17650 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v8f16, Loads[i]);
17651 SDValue FPExt = DAG.getNode(ARMISD::VCVTL, DL, MVT::v4f32, LoadBC,
17652 DAG.getConstant(0, DL, MVT::i32));
17653 Extends.push_back(FPExt);
17654 }
17655
17656 Loads = Extends;
17657 }
17658
17659 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
17660 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
17661 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Loads);
17662}
17663
17664/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
17665/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
17667 const ARMSubtarget *ST) {
17668 SDValue N0 = N->getOperand(0);
17669
17670 // Check for sign- and zero-extensions of vector extract operations of 8- and
17671 // 16-bit vector elements. NEON and MVE support these directly. They are
17672 // handled during DAG combining because type legalization will promote them
17673 // to 32-bit types and it is messy to recognize the operations after that.
17674 if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
17676 SDValue Vec = N0.getOperand(0);
17677 SDValue Lane = N0.getOperand(1);
17678 EVT VT = N->getValueType(0);
17679 EVT EltVT = N0.getValueType();
17680 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17681
17682 if (VT == MVT::i32 &&
17683 (EltVT == MVT::i8 || EltVT == MVT::i16) &&
17684 TLI.isTypeLegal(Vec.getValueType()) &&
17685 isa<ConstantSDNode>(Lane)) {
17686
17687 unsigned Opc = 0;
17688 switch (N->getOpcode()) {
17689 default: llvm_unreachable("unexpected opcode");
17690 case ISD::SIGN_EXTEND:
17691 Opc = ARMISD::VGETLANEs;
17692 break;
17693 case ISD::ZERO_EXTEND:
17694 case ISD::ANY_EXTEND:
17695 Opc = ARMISD::VGETLANEu;
17696 break;
17697 }
17698 return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
17699 }
17700 }
17701
17702 if (ST->hasMVEIntegerOps())
17703 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17704 return NewLoad;
17705
17706 return SDValue();
17707}
17708
17710 const ARMSubtarget *ST) {
17711 if (ST->hasMVEFloatOps())
17712 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17713 return NewLoad;
17714
17715 return SDValue();
17716}
17717
17718// Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating
17719// constant bounds.
17721 const ARMSubtarget *Subtarget) {
17722 if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
17723 !Subtarget->isThumb2())
17724 return SDValue();
17725
17726 EVT VT = Op.getValueType();
17727 SDValue Op0 = Op.getOperand(0);
17728
17729 if (VT != MVT::i32 ||
17730 (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) ||
17731 !isa<ConstantSDNode>(Op.getOperand(1)) ||
17733 return SDValue();
17734
17735 SDValue Min = Op;
17736 SDValue Max = Op0;
17737 SDValue Input = Op0.getOperand(0);
17738 if (Min.getOpcode() == ISD::SMAX)
17739 std::swap(Min, Max);
17740
17741 APInt MinC = Min.getConstantOperandAPInt(1);
17742 APInt MaxC = Max.getConstantOperandAPInt(1);
17743
17744 if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX ||
17745 !(MinC + 1).isPowerOf2())
17746 return SDValue();
17747
17748 SDLoc DL(Op);
17749 if (MinC == ~MaxC)
17750 return DAG.getNode(ARMISD::SSAT, DL, VT, Input,
17751 DAG.getConstant(MinC.countr_one(), DL, VT));
17752 if (MaxC == 0)
17753 return DAG.getNode(ARMISD::USAT, DL, VT, Input,
17754 DAG.getConstant(MinC.countr_one(), DL, VT));
17755
17756 return SDValue();
17757}
17758
17759/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
17760/// saturates.
17762 const ARMSubtarget *ST) {
17763 EVT VT = N->getValueType(0);
17764 SDValue N0 = N->getOperand(0);
17765
17766 if (VT == MVT::i32)
17767 return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST);
17768
17769 if (!ST->hasMVEIntegerOps())
17770 return SDValue();
17771
17772 if (SDValue V = PerformVQDMULHCombine(N, DAG))
17773 return V;
17774
17775 if (VT != MVT::v4i32 && VT != MVT::v8i16)
17776 return SDValue();
17777
17778 auto IsSignedSaturate = [&](SDNode *Min, SDNode *Max) {
17779 // Check one is a smin and the other is a smax
17780 if (Min->getOpcode() != ISD::SMIN)
17781 std::swap(Min, Max);
17782 if (Min->getOpcode() != ISD::SMIN || Max->getOpcode() != ISD::SMAX)
17783 return false;
17784
17785 APInt SaturateC;
17786 if (VT == MVT::v4i32)
17787 SaturateC = APInt(32, (1 << 15) - 1, true);
17788 else //if (VT == MVT::v8i16)
17789 SaturateC = APInt(16, (1 << 7) - 1, true);
17790
17791 APInt MinC, MaxC;
17792 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
17793 MinC != SaturateC)
17794 return false;
17795 if (!ISD::isConstantSplatVector(Max->getOperand(1).getNode(), MaxC) ||
17796 MaxC != ~SaturateC)
17797 return false;
17798 return true;
17799 };
17800
17801 if (IsSignedSaturate(N, N0.getNode())) {
17802 SDLoc DL(N);
17803 MVT ExtVT, HalfVT;
17804 if (VT == MVT::v4i32) {
17805 HalfVT = MVT::v8i16;
17806 ExtVT = MVT::v4i16;
17807 } else { // if (VT == MVT::v8i16)
17808 HalfVT = MVT::v16i8;
17809 ExtVT = MVT::v8i8;
17810 }
17811
17812 // Create a VQMOVNB with undef top lanes, then signed extended into the top
17813 // half. That extend will hopefully be removed if only the bottom bits are
17814 // demanded (though a truncating store, for example).
17815 SDValue VQMOVN =
17816 DAG.getNode(ARMISD::VQMOVNs, DL, HalfVT, DAG.getUNDEF(HalfVT),
17817 N0->getOperand(0), DAG.getConstant(0, DL, MVT::i32));
17818 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
17819 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Bitcast,
17820 DAG.getValueType(ExtVT));
17821 }
17822
17823 auto IsUnsignedSaturate = [&](SDNode *Min) {
17824 // For unsigned, we just need to check for <= 0xffff
17825 if (Min->getOpcode() != ISD::UMIN)
17826 return false;
17827
17828 APInt SaturateC;
17829 if (VT == MVT::v4i32)
17830 SaturateC = APInt(32, (1 << 16) - 1, true);
17831 else //if (VT == MVT::v8i16)
17832 SaturateC = APInt(16, (1 << 8) - 1, true);
17833
17834 APInt MinC;
17835 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
17836 MinC != SaturateC)
17837 return false;
17838 return true;
17839 };
17840
17841 if (IsUnsignedSaturate(N)) {
17842 SDLoc DL(N);
17843 MVT HalfVT;
17844 unsigned ExtConst;
17845 if (VT == MVT::v4i32) {
17846 HalfVT = MVT::v8i16;
17847 ExtConst = 0x0000FFFF;
17848 } else { //if (VT == MVT::v8i16)
17849 HalfVT = MVT::v16i8;
17850 ExtConst = 0x00FF;
17851 }
17852
17853 // Create a VQMOVNB with undef top lanes, then ZExt into the top half with
17854 // an AND. That extend will hopefully be removed if only the bottom bits are
17855 // demanded (though a truncating store, for example).
17856 SDValue VQMOVN =
17857 DAG.getNode(ARMISD::VQMOVNu, DL, HalfVT, DAG.getUNDEF(HalfVT), N0,
17858 DAG.getConstant(0, DL, MVT::i32));
17859 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
17860 return DAG.getNode(ISD::AND, DL, VT, Bitcast,
17861 DAG.getConstant(ExtConst, DL, VT));
17862 }
17863
17864 return SDValue();
17865}
17866
17869 if (!C)
17870 return nullptr;
17871 const APInt *CV = &C->getAPIntValue();
17872 return CV->isPowerOf2() ? CV : nullptr;
17873}
17874
17876 // If we have a CMOV, OR and AND combination such as:
17877 // if (x & CN)
17878 // y |= CM;
17879 //
17880 // And:
17881 // * CN is a single bit;
17882 // * All bits covered by CM are known zero in y
17883 //
17884 // Then we can convert this into a sequence of BFI instructions. This will
17885 // always be a win if CM is a single bit, will always be no worse than the
17886 // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
17887 // three bits (due to the extra IT instruction).
17888
17889 SDValue Op0 = CMOV->getOperand(0);
17890 SDValue Op1 = CMOV->getOperand(1);
17891 auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue();
17892 SDValue CmpZ = CMOV->getOperand(3);
17893
17894 // The compare must be against zero.
17895 if (!isNullConstant(CmpZ->getOperand(1)))
17896 return SDValue();
17897
17898 assert(CmpZ->getOpcode() == ARMISD::CMPZ);
17899 SDValue And = CmpZ->getOperand(0);
17900 if (And->getOpcode() != ISD::AND)
17901 return SDValue();
17902 const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
17903 if (!AndC)
17904 return SDValue();
17905 SDValue X = And->getOperand(0);
17906
17907 if (CC == ARMCC::EQ) {
17908 // We're performing an "equal to zero" compare. Swap the operands so we
17909 // canonicalize on a "not equal to zero" compare.
17910 std::swap(Op0, Op1);
17911 } else {
17912 assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
17913 }
17914
17915 if (Op1->getOpcode() != ISD::OR)
17916 return SDValue();
17917
17919 if (!OrC)
17920 return SDValue();
17921 SDValue Y = Op1->getOperand(0);
17922
17923 if (Op0 != Y)
17924 return SDValue();
17925
17926 // Now, is it profitable to continue?
17927 APInt OrCI = OrC->getAPIntValue();
17928 unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
17929 if (OrCI.popcount() > Heuristic)
17930 return SDValue();
17931
17932 // Lastly, can we determine that the bits defined by OrCI
17933 // are zero in Y?
17934 KnownBits Known = DAG.computeKnownBits(Y);
17935 if ((OrCI & Known.Zero) != OrCI)
17936 return SDValue();
17937
17938 // OK, we can do the combine.
17939 SDValue V = Y;
17940 SDLoc dl(X);
17941 EVT VT = X.getValueType();
17942 unsigned BitInX = AndC->logBase2();
17943
17944 if (BitInX != 0) {
17945 // We must shift X first.
17946 X = DAG.getNode(ISD::SRL, dl, VT, X,
17947 DAG.getConstant(BitInX, dl, VT));
17948 }
17949
17950 for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
17951 BitInY < NumActiveBits; ++BitInY) {
17952 if (OrCI[BitInY] == 0)
17953 continue;
17954 APInt Mask(VT.getSizeInBits(), 0);
17955 Mask.setBit(BitInY);
17956 V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
17957 // Confusingly, the operand is an *inverted* mask.
17958 DAG.getConstant(~Mask, dl, VT));
17959 }
17960
17961 return V;
17962}
17963
17964// Given N, the value controlling the conditional branch, search for the loop
17965// intrinsic, returning it, along with how the value is used. We need to handle
17966// patterns such as the following:
17967// (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit)
17968// (brcond (setcc (loop.decrement), 0, eq), exit)
17969// (brcond (setcc (loop.decrement), 0, ne), header)
17971 bool &Negate) {
17972 switch (N->getOpcode()) {
17973 default:
17974 break;
17975 case ISD::XOR: {
17976 if (!isa<ConstantSDNode>(N.getOperand(1)))
17977 return SDValue();
17978 if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())
17979 return SDValue();
17980 Negate = !Negate;
17981 return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate);
17982 }
17983 case ISD::SETCC: {
17984 auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));
17985 if (!Const)
17986 return SDValue();
17987 if (Const->isZero())
17988 Imm = 0;
17989 else if (Const->isOne())
17990 Imm = 1;
17991 else
17992 return SDValue();
17993 CC = cast<CondCodeSDNode>(N.getOperand(2))->get();
17994 return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
17995 }
17997 unsigned IntOp = N.getConstantOperandVal(1);
17998 if (IntOp != Intrinsic::test_start_loop_iterations &&
17999 IntOp != Intrinsic::loop_decrement_reg)
18000 return SDValue();
18001 return N;
18002 }
18003 }
18004 return SDValue();
18005}
18006
18009 const ARMSubtarget *ST) {
18010
18011 // The hwloop intrinsics that we're interested are used for control-flow,
18012 // either for entering or exiting the loop:
18013 // - test.start.loop.iterations will test whether its operand is zero. If it
18014 // is zero, the proceeding branch should not enter the loop.
18015 // - loop.decrement.reg also tests whether its operand is zero. If it is
18016 // zero, the proceeding branch should not branch back to the beginning of
18017 // the loop.
18018 // So here, we need to check that how the brcond is using the result of each
18019 // of the intrinsics to ensure that we're branching to the right place at the
18020 // right time.
18021
18022 ISD::CondCode CC;
18023 SDValue Cond;
18024 int Imm = 1;
18025 bool Negate = false;
18026 SDValue Chain = N->getOperand(0);
18027 SDValue Dest;
18028
18029 if (N->getOpcode() == ISD::BRCOND) {
18030 CC = ISD::SETEQ;
18031 Cond = N->getOperand(1);
18032 Dest = N->getOperand(2);
18033 } else {
18034 assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!");
18035 CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18036 Cond = N->getOperand(2);
18037 Dest = N->getOperand(4);
18038 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
18039 if (!Const->isOne() && !Const->isZero())
18040 return SDValue();
18041 Imm = Const->getZExtValue();
18042 } else
18043 return SDValue();
18044 }
18045
18046 SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate);
18047 if (!Int)
18048 return SDValue();
18049
18050 if (Negate)
18051 CC = ISD::getSetCCInverse(CC, /* Integer inverse */ MVT::i32);
18052
18053 auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) {
18054 return (CC == ISD::SETEQ && Imm == 0) ||
18055 (CC == ISD::SETNE && Imm == 1) ||
18056 (CC == ISD::SETLT && Imm == 1) ||
18057 (CC == ISD::SETULT && Imm == 1);
18058 };
18059
18060 auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) {
18061 return (CC == ISD::SETEQ && Imm == 1) ||
18062 (CC == ISD::SETNE && Imm == 0) ||
18063 (CC == ISD::SETGT && Imm == 0) ||
18064 (CC == ISD::SETUGT && Imm == 0) ||
18065 (CC == ISD::SETGE && Imm == 1) ||
18066 (CC == ISD::SETUGE && Imm == 1);
18067 };
18068
18069 assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&
18070 "unsupported condition");
18071
18072 SDLoc dl(Int);
18073 SelectionDAG &DAG = DCI.DAG;
18074 SDValue Elements = Int.getOperand(2);
18075 unsigned IntOp = Int->getConstantOperandVal(1);
18076 assert((N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BR) &&
18077 "expected single br user");
18078 SDNode *Br = *N->user_begin();
18079 SDValue OtherTarget = Br->getOperand(1);
18080
18081 // Update the unconditional branch to branch to the given Dest.
18082 auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) {
18083 SDValue NewBrOps[] = { Br->getOperand(0), Dest };
18084 SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps);
18085 DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr);
18086 };
18087
18088 if (IntOp == Intrinsic::test_start_loop_iterations) {
18089 SDValue Res;
18090 SDValue Setup = DAG.getNode(ARMISD::WLSSETUP, dl, MVT::i32, Elements);
18091 // We expect this 'instruction' to branch when the counter is zero.
18092 if (IsTrueIfZero(CC, Imm)) {
18093 SDValue Ops[] = {Chain, Setup, Dest};
18094 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18095 } else {
18096 // The logic is the reverse of what we need for WLS, so find the other
18097 // basic block target: the target of the proceeding br.
18098 UpdateUncondBr(Br, Dest, DAG);
18099
18100 SDValue Ops[] = {Chain, Setup, OtherTarget};
18101 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18102 }
18103 // Update LR count to the new value
18104 DAG.ReplaceAllUsesOfValueWith(Int.getValue(0), Setup);
18105 // Update chain
18106 DAG.ReplaceAllUsesOfValueWith(Int.getValue(2), Int.getOperand(0));
18107 return Res;
18108 } else {
18109 SDValue Size =
18110 DAG.getTargetConstant(Int.getConstantOperandVal(3), dl, MVT::i32);
18111 SDValue Args[] = { Int.getOperand(0), Elements, Size, };
18112 SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl,
18113 DAG.getVTList(MVT::i32, MVT::Other), Args);
18114 DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode());
18115
18116 // We expect this instruction to branch when the count is not zero.
18117 SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;
18118
18119 // Update the unconditional branch to target the loop preheader if we've
18120 // found the condition has been reversed.
18121 if (Target == OtherTarget)
18122 UpdateUncondBr(Br, Dest, DAG);
18123
18124 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18125 SDValue(LoopDec.getNode(), 1), Chain);
18126
18127 SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target };
18128 return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs);
18129 }
18130 return SDValue();
18131}
18132
18133/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
18134SDValue
18136 SDValue Cmp = N->getOperand(3);
18137 if (Cmp.getOpcode() != ARMISD::CMPZ)
18138 // Only looking at NE cases.
18139 return SDValue();
18140
18141 SDLoc dl(N);
18142 SDValue LHS = Cmp.getOperand(0);
18143 SDValue RHS = Cmp.getOperand(1);
18144 SDValue Chain = N->getOperand(0);
18145 SDValue BB = N->getOperand(1);
18146 SDValue ARMcc = N->getOperand(2);
18148
18149 // (brcond Chain BB ne (cmpz (and (cmov 0 1 CC Flags) 1) 0))
18150 // -> (brcond Chain BB CC Flags)
18151 if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
18152 LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
18153 LHS->getOperand(0)->hasOneUse() &&
18154 isNullConstant(LHS->getOperand(0)->getOperand(0)) &&
18155 isOneConstant(LHS->getOperand(0)->getOperand(1)) &&
18156 isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) {
18157 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, BB,
18158 LHS->getOperand(0)->getOperand(2),
18159 LHS->getOperand(0)->getOperand(3));
18160 }
18161
18162 return SDValue();
18163}
18164
18165/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
18166SDValue
18168 SDValue Cmp = N->getOperand(3);
18169 if (Cmp.getOpcode() != ARMISD::CMPZ)
18170 // Only looking at EQ and NE cases.
18171 return SDValue();
18172
18173 EVT VT = N->getValueType(0);
18174 SDLoc dl(N);
18175 SDValue LHS = Cmp.getOperand(0);
18176 SDValue RHS = Cmp.getOperand(1);
18177 SDValue FalseVal = N->getOperand(0);
18178 SDValue TrueVal = N->getOperand(1);
18179 SDValue ARMcc = N->getOperand(2);
18181
18182 // BFI is only available on V6T2+.
18183 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
18185 if (R)
18186 return R;
18187 }
18188
18189 // Simplify
18190 // mov r1, r0
18191 // cmp r1, x
18192 // mov r0, y
18193 // moveq r0, x
18194 // to
18195 // cmp r0, x
18196 // movne r0, y
18197 //
18198 // mov r1, r0
18199 // cmp r1, x
18200 // mov r0, x
18201 // movne r0, y
18202 // to
18203 // cmp r0, x
18204 // movne r0, y
18205 /// FIXME: Turn this into a target neutral optimization?
18206 SDValue Res;
18207 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
18208 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, Cmp);
18209 } else if (CC == ARMCC::EQ && TrueVal == RHS) {
18210 SDValue ARMcc;
18211 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
18212 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, NewCmp);
18213 }
18214
18215 // (cmov F T ne (cmpz (cmov 0 1 CC Flags) 0))
18216 // -> (cmov F T CC Flags)
18217 if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&
18218 isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
18219 isNullConstant(RHS)) {
18220 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
18221 LHS->getOperand(2), LHS->getOperand(3));
18222 }
18223
18224 if (!VT.isInteger())
18225 return SDValue();
18226
18227 // Fold away an unneccessary CMPZ/CMOV
18228 // CMOV A, B, C1, (CMPZ (CMOV 1, 0, C2, D), 0) ->
18229 // if C1==EQ -> CMOV A, B, C2, D
18230 // if C1==NE -> CMOV A, B, NOT(C2), D
18231 if (N->getConstantOperandVal(2) == ARMCC::EQ ||
18232 N->getConstantOperandVal(2) == ARMCC::NE) {
18234 if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
18235 if (N->getConstantOperandVal(2) == ARMCC::NE)
18237 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
18238 N->getOperand(1),
18239 DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
18240 }
18241 }
18242
18243 // Materialize a boolean comparison for integers so we can avoid branching.
18244 if (isNullConstant(FalseVal)) {
18245 if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
18246 if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
18247 // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
18248 // right 5 bits will make that 32 be 1, otherwise it will be 0.
18249 // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
18250 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18251 Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub),
18252 DAG.getConstant(5, dl, MVT::i32));
18253 } else {
18254 // CMOV 0, 1, ==, (CMPZ x, y) ->
18255 // (UADDO_CARRY (SUB x, y), t:0, t:1)
18256 // where t = (USUBO_CARRY 0, (SUB x, y), 0)
18257 //
18258 // The USUBO_CARRY computes 0 - (x - y) and this will give a borrow when
18259 // x != y. In other words, a carry C == 1 when x == y, C == 0
18260 // otherwise.
18261 // The final UADDO_CARRY computes
18262 // x - y + (0 - (x - y)) + C == C
18263 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18264 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18265 SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);
18266 // ISD::USUBO_CARRY returns a borrow but we want the carry here
18267 // actually.
18268 SDValue Carry =
18269 DAG.getNode(ISD::SUB, dl, MVT::i32,
18270 DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
18271 Res = DAG.getNode(ISD::UADDO_CARRY, dl, VTs, Sub, Neg, Carry);
18272 }
18273 } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
18274 (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
18275 // This seems pointless but will allow us to combine it further below.
18276 // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
18277 SDValue Sub =
18278 DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18279 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
18280 Sub.getValue(1));
18281 FalseVal = Sub;
18282 }
18283 } else if (isNullConstant(TrueVal)) {
18284 if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
18285 (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
18286 // This seems pointless but will allow us to combine it further below
18287 // Note that we change == for != as this is the dual for the case above.
18288 // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
18289 SDValue Sub =
18290 DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18291 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
18292 DAG.getConstant(ARMCC::NE, dl, MVT::i32),
18293 Sub.getValue(1));
18294 FalseVal = Sub;
18295 }
18296 }
18297
18298 // On Thumb1, the DAG above may be further combined if z is a power of 2
18299 // (z == 2 ^ K).
18300 // CMOV (SUBC x, y), z, !=, (SUBC x, y):1 ->
18301 // t1 = (USUBO (SUB x, y), 1)
18302 // t2 = (USUBO_CARRY (SUB x, y), t1:0, t1:1)
18303 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18304 //
18305 // This also handles the special case of comparing against zero; it's
18306 // essentially, the same pattern, except there's no SUBC:
18307 // CMOV x, z, !=, (CMPZ x, 0) ->
18308 // t1 = (USUBO x, 1)
18309 // t2 = (USUBO_CARRY x, t1:0, t1:1)
18310 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18311 const APInt *TrueConst;
18312 if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
18313 ((FalseVal.getOpcode() == ARMISD::SUBC && FalseVal.getOperand(0) == LHS &&
18314 FalseVal.getOperand(1) == RHS) ||
18315 (FalseVal == LHS && isNullConstant(RHS))) &&
18316 (TrueConst = isPowerOf2Constant(TrueVal))) {
18317 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18318 unsigned ShiftAmount = TrueConst->logBase2();
18319 if (ShiftAmount)
18320 TrueVal = DAG.getConstant(1, dl, VT);
18321 SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
18322 Res = DAG.getNode(ISD::USUBO_CARRY, dl, VTs, FalseVal, Subc,
18323 Subc.getValue(1));
18324
18325 if (ShiftAmount)
18326 Res = DAG.getNode(ISD::SHL, dl, VT, Res,
18327 DAG.getConstant(ShiftAmount, dl, MVT::i32));
18328 }
18329
18330 if (Res.getNode()) {
18331 KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
18332 // Capture demanded bits information that would be otherwise lost.
18333 if (Known.Zero == 0xfffffffe)
18334 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18335 DAG.getValueType(MVT::i1));
18336 else if (Known.Zero == 0xffffff00)
18337 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18338 DAG.getValueType(MVT::i8));
18339 else if (Known.Zero == 0xffff0000)
18340 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18341 DAG.getValueType(MVT::i16));
18342 }
18343
18344 return Res;
18345}
18346
18349 const ARMSubtarget *ST) {
18350 SelectionDAG &DAG = DCI.DAG;
18351 SDValue Src = N->getOperand(0);
18352 EVT DstVT = N->getValueType(0);
18353
18354 // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.
18355 if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
18356 EVT SrcVT = Src.getValueType();
18357 if (SrcVT.getScalarSizeInBits() == DstVT.getScalarSizeInBits())
18358 return DAG.getNode(ARMISD::VDUP, SDLoc(N), DstVT, Src.getOperand(0));
18359 }
18360
18361 // We may have a bitcast of something that has already had this bitcast
18362 // combine performed on it, so skip past any VECTOR_REG_CASTs.
18363 if (Src.getOpcode() == ARMISD::VECTOR_REG_CAST &&
18364 Src.getOperand(0).getValueType().getScalarSizeInBits() <=
18365 Src.getValueType().getScalarSizeInBits())
18366 Src = Src.getOperand(0);
18367
18368 // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
18369 // would be generated is at least the width of the element type.
18370 EVT SrcVT = Src.getValueType();
18371 if ((Src.getOpcode() == ARMISD::VMOVIMM ||
18372 Src.getOpcode() == ARMISD::VMVNIMM ||
18373 Src.getOpcode() == ARMISD::VMOVFPIMM) &&
18374 SrcVT.getScalarSizeInBits() <= DstVT.getScalarSizeInBits() &&
18375 DAG.getDataLayout().isBigEndian())
18376 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(N), DstVT, Src);
18377
18378 // bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD x
18379 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
18380 return R;
18381
18382 return SDValue();
18383}
18384
18385// Some combines for the MVETrunc truncations legalizer helper. Also lowers the
18386// node into stack operations after legalizeOps.
18389 SelectionDAG &DAG = DCI.DAG;
18390 EVT VT = N->getValueType(0);
18391 SDLoc DL(N);
18392
18393 // MVETrunc(Undef, Undef) -> Undef
18394 if (all_of(N->ops(), [](SDValue Op) { return Op.isUndef(); }))
18395 return DAG.getUNDEF(VT);
18396
18397 // MVETrunc(MVETrunc a b, MVETrunc c, d) -> MVETrunc
18398 if (N->getNumOperands() == 2 &&
18399 N->getOperand(0).getOpcode() == ARMISD::MVETRUNC &&
18400 N->getOperand(1).getOpcode() == ARMISD::MVETRUNC)
18401 return DAG.getNode(ARMISD::MVETRUNC, DL, VT, N->getOperand(0).getOperand(0),
18402 N->getOperand(0).getOperand(1),
18403 N->getOperand(1).getOperand(0),
18404 N->getOperand(1).getOperand(1));
18405
18406 // MVETrunc(shuffle, shuffle) -> VMOVN
18407 if (N->getNumOperands() == 2 &&
18408 N->getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
18409 N->getOperand(1).getOpcode() == ISD::VECTOR_SHUFFLE) {
18410 auto *S0 = cast<ShuffleVectorSDNode>(N->getOperand(0).getNode());
18411 auto *S1 = cast<ShuffleVectorSDNode>(N->getOperand(1).getNode());
18412
18413 if (S0->getOperand(0) == S1->getOperand(0) &&
18414 S0->getOperand(1) == S1->getOperand(1)) {
18415 // Construct complete shuffle mask
18416 SmallVector<int, 8> Mask(S0->getMask());
18417 Mask.append(S1->getMask().begin(), S1->getMask().end());
18418
18419 if (isVMOVNTruncMask(Mask, VT, false))
18420 return DAG.getNode(
18421 ARMISD::VMOVN, DL, VT,
18422 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18423 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18424 DAG.getConstant(1, DL, MVT::i32));
18425 if (isVMOVNTruncMask(Mask, VT, true))
18426 return DAG.getNode(
18427 ARMISD::VMOVN, DL, VT,
18428 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18429 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18430 DAG.getConstant(1, DL, MVT::i32));
18431 }
18432 }
18433
18434 // For MVETrunc of a buildvector or shuffle, it can be beneficial to lower the
18435 // truncate to a buildvector to allow the generic optimisations to kick in.
18436 if (all_of(N->ops(), [](SDValue Op) {
18437 return Op.getOpcode() == ISD::BUILD_VECTOR ||
18438 Op.getOpcode() == ISD::VECTOR_SHUFFLE ||
18439 (Op.getOpcode() == ISD::BITCAST &&
18440 Op.getOperand(0).getOpcode() == ISD::BUILD_VECTOR);
18441 })) {
18442 SmallVector<SDValue, 8> Extracts;
18443 for (unsigned Op = 0; Op < N->getNumOperands(); Op++) {
18444 SDValue O = N->getOperand(Op);
18445 for (unsigned i = 0; i < O.getValueType().getVectorNumElements(); i++) {
18446 SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, O,
18447 DAG.getConstant(i, DL, MVT::i32));
18448 Extracts.push_back(Ext);
18449 }
18450 }
18451 return DAG.getBuildVector(VT, DL, Extracts);
18452 }
18453
18454 // If we are late in the legalization process and nothing has optimised
18455 // the trunc to anything better, lower it to a stack store and reload,
18456 // performing the truncation whilst keeping the lanes in the correct order:
18457 // VSTRH.32 a, stack; VSTRH.32 b, stack+8; VLDRW.32 stack;
18458 if (!DCI.isAfterLegalizeDAG())
18459 return SDValue();
18460
18461 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18462 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18463 int NumIns = N->getNumOperands();
18464 assert((NumIns == 2 || NumIns == 4) &&
18465 "Expected 2 or 4 inputs to an MVETrunc");
18466 EVT StoreVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
18467 if (N->getNumOperands() == 4)
18468 StoreVT = StoreVT.getHalfNumVectorElementsVT(*DAG.getContext());
18469
18470 SmallVector<SDValue> Chains;
18471 for (int I = 0; I < NumIns; I++) {
18472 SDValue Ptr = DAG.getNode(
18473 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18474 DAG.getConstant(I * 16 / NumIns, DL, StackPtr.getValueType()));
18476 DAG.getMachineFunction(), SPFI, I * 16 / NumIns);
18477 SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), DL, N->getOperand(I),
18478 Ptr, MPI, StoreVT, Align(4));
18479 Chains.push_back(Ch);
18480 }
18481
18482 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18483 MachinePointerInfo MPI =
18485 return DAG.getLoad(VT, DL, Chain, StackPtr, MPI, Align(4));
18486}
18487
18488// Take a MVEEXT(load x) and split that into (extload x, extload x+8)
18490 SelectionDAG &DAG) {
18491 SDValue N0 = N->getOperand(0);
18493 if (!LD || !LD->isSimple() || !N0.hasOneUse() || LD->isIndexed())
18494 return SDValue();
18495
18496 EVT FromVT = LD->getMemoryVT();
18497 EVT ToVT = N->getValueType(0);
18498 if (!ToVT.isVector())
18499 return SDValue();
18500 assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements() * 2);
18501 EVT ToEltVT = ToVT.getVectorElementType();
18502 EVT FromEltVT = FromVT.getVectorElementType();
18503
18504 unsigned NumElements = 0;
18505 if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8))
18506 NumElements = 4;
18507 if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)
18508 NumElements = 8;
18509 assert(NumElements != 0);
18510
18511 ISD::LoadExtType NewExtType =
18512 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
18513 if (LD->getExtensionType() != ISD::NON_EXTLOAD &&
18514 LD->getExtensionType() != ISD::EXTLOAD &&
18515 LD->getExtensionType() != NewExtType)
18516 return SDValue();
18517
18518 LLVMContext &C = *DAG.getContext();
18519 SDLoc DL(LD);
18520 // Details about the old load
18521 SDValue Ch = LD->getChain();
18522 SDValue BasePtr = LD->getBasePtr();
18523 Align Alignment = LD->getBaseAlign();
18524 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
18525 AAMDNodes AAInfo = LD->getAAInfo();
18526
18527 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
18528 EVT NewFromVT = EVT::getVectorVT(
18529 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
18530 EVT NewToVT = EVT::getVectorVT(
18531 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
18532
18535 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
18536 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
18537 SDValue NewPtr =
18538 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
18539
18540 SDValue NewLoad =
18541 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
18542 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
18543 Alignment, MMOFlags, AAInfo);
18544 Loads.push_back(NewLoad);
18545 Chains.push_back(SDValue(NewLoad.getNode(), 1));
18546 }
18547
18548 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18549 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
18550 return DAG.getMergeValues(Loads, DL);
18551}
18552
18553// Perform combines for MVEEXT. If it has not be optimized to anything better
18554// before lowering, it gets converted to stack store and extloads performing the
18555// extend whilst still keeping the same lane ordering.
18558 SelectionDAG &DAG = DCI.DAG;
18559 EVT VT = N->getValueType(0);
18560 SDLoc DL(N);
18561 assert(N->getNumValues() == 2 && "Expected MVEEXT with 2 elements");
18562 assert((VT == MVT::v4i32 || VT == MVT::v8i16) && "Unexpected MVEEXT type");
18563
18564 EVT ExtVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18565 *DAG.getContext());
18566 auto Extend = [&](SDValue V) {
18567 SDValue VVT = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, V);
18568 return N->getOpcode() == ARMISD::MVESEXT
18569 ? DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, VVT,
18570 DAG.getValueType(ExtVT))
18571 : DAG.getZeroExtendInReg(VVT, DL, ExtVT);
18572 };
18573
18574 // MVEEXT(VDUP) -> SIGN_EXTEND_INREG(VDUP)
18575 if (N->getOperand(0).getOpcode() == ARMISD::VDUP) {
18576 SDValue Ext = Extend(N->getOperand(0));
18577 return DAG.getMergeValues({Ext, Ext}, DL);
18578 }
18579
18580 // MVEEXT(shuffle) -> SIGN_EXTEND_INREG/ZERO_EXTEND_INREG
18581 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0))) {
18582 ArrayRef<int> Mask = SVN->getMask();
18583 assert(Mask.size() == 2 * VT.getVectorNumElements());
18584 assert(Mask.size() == SVN->getValueType(0).getVectorNumElements());
18585 unsigned Rev = VT == MVT::v4i32 ? ARMISD::VREV32 : ARMISD::VREV16;
18586 SDValue Op0 = SVN->getOperand(0);
18587 SDValue Op1 = SVN->getOperand(1);
18588
18589 auto CheckInregMask = [&](int Start, int Offset) {
18590 for (int Idx = 0, E = VT.getVectorNumElements(); Idx < E; ++Idx)
18591 if (Mask[Start + Idx] >= 0 && Mask[Start + Idx] != Idx * 2 + Offset)
18592 return false;
18593 return true;
18594 };
18595 SDValue V0 = SDValue(N, 0);
18596 SDValue V1 = SDValue(N, 1);
18597 if (CheckInregMask(0, 0))
18598 V0 = Extend(Op0);
18599 else if (CheckInregMask(0, 1))
18600 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18601 else if (CheckInregMask(0, Mask.size()))
18602 V0 = Extend(Op1);
18603 else if (CheckInregMask(0, Mask.size() + 1))
18604 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18605
18606 if (CheckInregMask(VT.getVectorNumElements(), Mask.size()))
18607 V1 = Extend(Op1);
18608 else if (CheckInregMask(VT.getVectorNumElements(), Mask.size() + 1))
18609 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18610 else if (CheckInregMask(VT.getVectorNumElements(), 0))
18611 V1 = Extend(Op0);
18612 else if (CheckInregMask(VT.getVectorNumElements(), 1))
18613 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18614
18615 if (V0.getNode() != N || V1.getNode() != N)
18616 return DAG.getMergeValues({V0, V1}, DL);
18617 }
18618
18619 // MVEEXT(load) -> extload, extload
18620 if (N->getOperand(0)->getOpcode() == ISD::LOAD)
18622 return L;
18623
18624 if (!DCI.isAfterLegalizeDAG())
18625 return SDValue();
18626
18627 // Lower to a stack store and reload:
18628 // VSTRW.32 a, stack; VLDRH.32 stack; VLDRH.32 stack+8;
18629 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18630 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18631 int NumOuts = N->getNumValues();
18632 assert((NumOuts == 2 || NumOuts == 4) &&
18633 "Expected 2 or 4 outputs to an MVEEXT");
18634 EVT LoadVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18635 *DAG.getContext());
18636 if (N->getNumOperands() == 4)
18637 LoadVT = LoadVT.getHalfNumVectorElementsVT(*DAG.getContext());
18638
18639 MachinePointerInfo MPI =
18641 SDValue Chain = DAG.getStore(DAG.getEntryNode(), DL, N->getOperand(0),
18642 StackPtr, MPI, Align(4));
18643
18645 for (int I = 0; I < NumOuts; I++) {
18646 SDValue Ptr = DAG.getNode(
18647 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18648 DAG.getConstant(I * 16 / NumOuts, DL, StackPtr.getValueType()));
18650 DAG.getMachineFunction(), SPFI, I * 16 / NumOuts);
18651 SDValue Load = DAG.getExtLoad(
18652 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL,
18653 VT, Chain, Ptr, MPI, LoadVT, Align(4));
18654 Loads.push_back(Load);
18655 }
18656
18657 return DAG.getMergeValues(Loads, DL);
18658}
18659
18661 DAGCombinerInfo &DCI) const {
18662 switch (N->getOpcode()) {
18663 default: break;
18664 case ISD::SELECT_CC:
18665 case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
18666 case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
18667 case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
18668 case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
18669 case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
18670 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
18671 case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);
18672 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
18673 case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
18674 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
18675 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
18676 case ISD::BRCOND:
18677 case ISD::BR_CC: return PerformHWLoopCombine(N, DCI, Subtarget);
18678 case ARMISD::ADDC:
18679 case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
18680 case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
18681 case ARMISD::BFI: return PerformBFICombine(N, DCI.DAG);
18682 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
18683 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
18684 case ARMISD::VMOVhr: return PerformVMOVhrCombine(N, DCI);
18685 case ARMISD::VMOVrh: return PerformVMOVrhCombine(N, DCI.DAG);
18686 case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
18687 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
18690 return PerformExtractEltCombine(N, DCI, Subtarget);
18694 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
18695 case ARMISD::VDUP: return PerformVDUPCombine(N, DCI.DAG, Subtarget);
18696 case ISD::FP_TO_SINT:
18697 case ISD::FP_TO_UINT:
18698 return PerformVCVTCombine(N, DCI.DAG, Subtarget);
18699 case ISD::FADD:
18700 return PerformFADDCombine(N, DCI.DAG, Subtarget);
18701 case ISD::FMUL:
18702 return PerformVMulVCTPCombine(N, DCI.DAG, Subtarget);
18704 return PerformIntrinsicCombine(N, DCI);
18705 case ISD::SHL:
18706 case ISD::SRA:
18707 case ISD::SRL:
18708 return PerformShiftCombine(N, DCI, Subtarget);
18709 case ISD::SIGN_EXTEND:
18710 case ISD::ZERO_EXTEND:
18711 case ISD::ANY_EXTEND:
18712 return PerformExtendCombine(N, DCI.DAG, Subtarget);
18713 case ISD::FP_EXTEND:
18714 return PerformFPExtendCombine(N, DCI.DAG, Subtarget);
18715 case ISD::SMIN:
18716 case ISD::UMIN:
18717 case ISD::SMAX:
18718 case ISD::UMAX:
18719 return PerformMinMaxCombine(N, DCI.DAG, Subtarget);
18720 case ARMISD::CMOV:
18721 return PerformCMOVCombine(N, DCI.DAG);
18722 case ARMISD::BRCOND:
18723 return PerformBRCONDCombine(N, DCI.DAG);
18724 case ARMISD::CMPZ:
18725 return PerformCMPZCombine(N, DCI.DAG);
18726 case ARMISD::CSINC:
18727 case ARMISD::CSINV:
18728 case ARMISD::CSNEG:
18729 return PerformCSETCombine(N, DCI.DAG);
18730 case ISD::LOAD:
18731 return PerformLOADCombine(N, DCI, Subtarget);
18732 case ARMISD::VLD1DUP:
18733 case ARMISD::VLD2DUP:
18734 case ARMISD::VLD3DUP:
18735 case ARMISD::VLD4DUP:
18736 return PerformVLDCombine(N, DCI);
18738 return PerformARMBUILD_VECTORCombine(N, DCI);
18739 case ISD::BITCAST:
18740 return PerformBITCASTCombine(N, DCI, Subtarget);
18741 case ARMISD::PREDICATE_CAST:
18742 return PerformPREDICATE_CASTCombine(N, DCI);
18743 case ARMISD::VECTOR_REG_CAST:
18744 return PerformVECTOR_REG_CASTCombine(N, DCI.DAG, Subtarget);
18745 case ARMISD::MVETRUNC:
18746 return PerformMVETruncCombine(N, DCI);
18747 case ARMISD::MVESEXT:
18748 case ARMISD::MVEZEXT:
18749 return PerformMVEExtCombine(N, DCI);
18750 case ARMISD::VCMP:
18751 return PerformVCMPCombine(N, DCI.DAG, Subtarget);
18752 case ISD::VECREDUCE_ADD:
18753 return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);
18754 case ARMISD::VADDVs:
18755 case ARMISD::VADDVu:
18756 case ARMISD::VADDLVs:
18757 case ARMISD::VADDLVu:
18758 case ARMISD::VADDLVAs:
18759 case ARMISD::VADDLVAu:
18760 case ARMISD::VMLAVs:
18761 case ARMISD::VMLAVu:
18762 case ARMISD::VMLALVs:
18763 case ARMISD::VMLALVu:
18764 case ARMISD::VMLALVAs:
18765 case ARMISD::VMLALVAu:
18766 return PerformReduceShuffleCombine(N, DCI.DAG);
18767 case ARMISD::VMOVN:
18768 return PerformVMOVNCombine(N, DCI);
18769 case ARMISD::VQMOVNs:
18770 case ARMISD::VQMOVNu:
18771 return PerformVQMOVNCombine(N, DCI);
18772 case ARMISD::VQDMULH:
18773 return PerformVQDMULHCombine(N, DCI);
18774 case ARMISD::ASRL:
18775 case ARMISD::LSRL:
18776 case ARMISD::LSLL:
18777 return PerformLongShiftCombine(N, DCI.DAG);
18778 case ARMISD::SMULWB: {
18779 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18780 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
18781 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
18782 return SDValue();
18783 break;
18784 }
18785 case ARMISD::SMULWT: {
18786 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18787 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
18788 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
18789 return SDValue();
18790 break;
18791 }
18792 case ARMISD::SMLALBB:
18793 case ARMISD::QADD16b:
18794 case ARMISD::QSUB16b:
18795 case ARMISD::UQADD16b:
18796 case ARMISD::UQSUB16b: {
18797 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18798 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
18799 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
18800 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
18801 return SDValue();
18802 break;
18803 }
18804 case ARMISD::SMLALBT: {
18805 unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
18806 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
18807 unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
18808 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
18809 if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
18810 (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
18811 return SDValue();
18812 break;
18813 }
18814 case ARMISD::SMLALTB: {
18815 unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
18816 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
18817 unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
18818 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
18819 if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
18820 (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
18821 return SDValue();
18822 break;
18823 }
18824 case ARMISD::SMLALTT: {
18825 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18826 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
18827 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
18828 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
18829 return SDValue();
18830 break;
18831 }
18832 case ARMISD::QADD8b:
18833 case ARMISD::QSUB8b:
18834 case ARMISD::UQADD8b:
18835 case ARMISD::UQSUB8b: {
18836 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18837 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
18838 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
18839 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
18840 return SDValue();
18841 break;
18842 }
18843 case ARMISD::VBSP:
18844 if (N->getOperand(1) == N->getOperand(2))
18845 return N->getOperand(1);
18846 return SDValue();
18849 switch (N->getConstantOperandVal(1)) {
18850 case Intrinsic::arm_neon_vld1:
18851 case Intrinsic::arm_neon_vld1x2:
18852 case Intrinsic::arm_neon_vld1x3:
18853 case Intrinsic::arm_neon_vld1x4:
18854 case Intrinsic::arm_neon_vld2:
18855 case Intrinsic::arm_neon_vld3:
18856 case Intrinsic::arm_neon_vld4:
18857 case Intrinsic::arm_neon_vld2lane:
18858 case Intrinsic::arm_neon_vld3lane:
18859 case Intrinsic::arm_neon_vld4lane:
18860 case Intrinsic::arm_neon_vld2dup:
18861 case Intrinsic::arm_neon_vld3dup:
18862 case Intrinsic::arm_neon_vld4dup:
18863 case Intrinsic::arm_neon_vst1:
18864 case Intrinsic::arm_neon_vst1x2:
18865 case Intrinsic::arm_neon_vst1x3:
18866 case Intrinsic::arm_neon_vst1x4:
18867 case Intrinsic::arm_neon_vst2:
18868 case Intrinsic::arm_neon_vst3:
18869 case Intrinsic::arm_neon_vst4:
18870 case Intrinsic::arm_neon_vst2lane:
18871 case Intrinsic::arm_neon_vst3lane:
18872 case Intrinsic::arm_neon_vst4lane:
18873 return PerformVLDCombine(N, DCI);
18874 case Intrinsic::arm_mve_vld2q:
18875 case Intrinsic::arm_mve_vld4q:
18876 case Intrinsic::arm_mve_vst2q:
18877 case Intrinsic::arm_mve_vst4q:
18878 return PerformMVEVLDCombine(N, DCI);
18879 default: break;
18880 }
18881 break;
18882 }
18883 return SDValue();
18884}
18885
18887 EVT VT) const {
18888 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
18889}
18890
18892 Align Alignment,
18894 unsigned *Fast) const {
18895 // Depends what it gets converted into if the type is weird.
18896 if (!VT.isSimple())
18897 return false;
18898
18899 // The AllowsUnaligned flag models the SCTLR.A setting in ARM cpus
18900 bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
18901 auto Ty = VT.getSimpleVT().SimpleTy;
18902
18903 if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
18904 // Unaligned access can use (for example) LRDB, LRDH, LDR
18905 if (AllowsUnaligned) {
18906 if (Fast)
18907 *Fast = Subtarget->hasV7Ops();
18908 return true;
18909 }
18910 }
18911
18912 if (Ty == MVT::f64 || Ty == MVT::v2f64) {
18913 // For any little-endian targets with neon, we can support unaligned ld/st
18914 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
18915 // A big-endian target may also explicitly support unaligned accesses
18916 if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
18917 if (Fast)
18918 *Fast = 1;
18919 return true;
18920 }
18921 }
18922
18923 if (!Subtarget->hasMVEIntegerOps())
18924 return false;
18925
18926 // These are for predicates
18927 if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||
18928 Ty == MVT::v2i1)) {
18929 if (Fast)
18930 *Fast = 1;
18931 return true;
18932 }
18933
18934 // These are for truncated stores/narrowing loads. They are fine so long as
18935 // the alignment is at least the size of the item being loaded
18936 if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
18937 Alignment >= VT.getScalarSizeInBits() / 8) {
18938 if (Fast)
18939 *Fast = true;
18940 return true;
18941 }
18942
18943 // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
18944 // VSTRW.U32 all store the vector register in exactly the same format, and
18945 // differ only in the range of their immediate offset field and the required
18946 // alignment. So there is always a store that can be used, regardless of
18947 // actual type.
18948 //
18949 // For big endian, that is not the case. But can still emit a (VSTRB.U8;
18950 // VREV64.8) pair and get the same effect. This will likely be better than
18951 // aligning the vector through the stack.
18952 if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
18953 Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
18954 Ty == MVT::v2f64) {
18955 if (Fast)
18956 *Fast = 1;
18957 return true;
18958 }
18959
18960 return false;
18961}
18962
18964 LLVMContext &Context, const MemOp &Op,
18965 const AttributeList &FuncAttributes) const {
18966 // See if we can use NEON instructions for this...
18967 if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
18968 !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
18969 unsigned Fast;
18970 if (Op.size() >= 16 &&
18971 (Op.isAligned(Align(16)) ||
18972 (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, Align(1),
18974 Fast))) {
18975 return MVT::v2f64;
18976 } else if (Op.size() >= 8 &&
18977 (Op.isAligned(Align(8)) ||
18979 MVT::f64, 0, Align(1), MachineMemOperand::MONone, &Fast) &&
18980 Fast))) {
18981 return MVT::f64;
18982 }
18983 }
18984
18985 // Let the target-independent logic figure it out.
18986 return MVT::Other;
18987}
18988
18989// 64-bit integers are split into their high and low parts and held in two
18990// different registers, so the trunc is free since the low register can just
18991// be used.
18992bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
18993 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
18994 return false;
18995 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
18996 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
18997 return (SrcBits == 64 && DestBits == 32);
18998}
18999
19001 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
19002 !DstVT.isInteger())
19003 return false;
19004 unsigned SrcBits = SrcVT.getSizeInBits();
19005 unsigned DestBits = DstVT.getSizeInBits();
19006 return (SrcBits == 64 && DestBits == 32);
19007}
19008
19010 if (Val.getOpcode() != ISD::LOAD)
19011 return false;
19012
19013 EVT VT1 = Val.getValueType();
19014 if (!VT1.isSimple() || !VT1.isInteger() ||
19015 !VT2.isSimple() || !VT2.isInteger())
19016 return false;
19017
19018 switch (VT1.getSimpleVT().SimpleTy) {
19019 default: break;
19020 case MVT::i1:
19021 case MVT::i8:
19022 case MVT::i16:
19023 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
19024 return true;
19025 }
19026
19027 return false;
19028}
19029
19031 if (!VT.isSimple())
19032 return false;
19033
19034 // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that
19035 // negate values directly (fneg is free). So, we don't want to let the DAG
19036 // combiner rewrite fneg into xors and some other instructions. For f16 and
19037 // FullFP16 argument passing, some bitcast nodes may be introduced,
19038 // triggering this DAG combine rewrite, so we are avoiding that with this.
19039 switch (VT.getSimpleVT().SimpleTy) {
19040 default: break;
19041 case MVT::f16:
19042 return Subtarget->hasFullFP16();
19043 }
19044
19045 return false;
19046}
19047
19049 if (!Subtarget->hasMVEIntegerOps())
19050 return nullptr;
19051 Type *SVIType = SVI->getType();
19052 Type *ScalarType = SVIType->getScalarType();
19053
19054 if (ScalarType->isFloatTy())
19055 return Type::getInt32Ty(SVIType->getContext());
19056 if (ScalarType->isHalfTy())
19057 return Type::getInt16Ty(SVIType->getContext());
19058 return nullptr;
19059}
19060
19062 EVT VT = ExtVal.getValueType();
19063
19064 if (!isTypeLegal(VT))
19065 return false;
19066
19067 if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.getOperand(0))) {
19068 if (Ld->isExpandingLoad())
19069 return false;
19070 }
19071
19072 if (Subtarget->hasMVEIntegerOps())
19073 return true;
19074
19075 // Don't create a loadext if we can fold the extension into a wide/long
19076 // instruction.
19077 // If there's more than one user instruction, the loadext is desirable no
19078 // matter what. There can be two uses by the same instruction.
19079 if (ExtVal->use_empty() ||
19080 !ExtVal->user_begin()->isOnlyUserOf(ExtVal.getNode()))
19081 return true;
19082
19083 SDNode *U = *ExtVal->user_begin();
19084 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
19085 U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
19086 return false;
19087
19088 return true;
19089}
19090
19092 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19093 return false;
19094
19095 if (!isTypeLegal(EVT::getEVT(Ty1)))
19096 return false;
19097
19098 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
19099
19100 // Assuming the caller doesn't have a zeroext or signext return parameter,
19101 // truncation all the way down to i1 is valid.
19102 return true;
19103}
19104
19105/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
19106/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
19107/// expanded to FMAs when this method returns true, otherwise fmuladd is
19108/// expanded to fmul + fadd.
19109///
19110/// ARM supports both fused and unfused multiply-add operations; we already
19111/// lower a pair of fmul and fadd to the latter so it's not clear that there
19112/// would be a gain or that the gain would be worthwhile enough to risk
19113/// correctness bugs.
19114///
19115/// For MVE, we set this to true as it helps simplify the need for some
19116/// patterns (and we don't have the non-fused floating point instruction).
19117bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
19118 EVT VT) const {
19119 if (Subtarget->useSoftFloat())
19120 return false;
19121
19122 if (!VT.isSimple())
19123 return false;
19124
19125 switch (VT.getSimpleVT().SimpleTy) {
19126 case MVT::v4f32:
19127 case MVT::v8f16:
19128 return Subtarget->hasMVEFloatOps();
19129 case MVT::f16:
19130 return Subtarget->useFPVFMx16();
19131 case MVT::f32:
19132 return Subtarget->useFPVFMx();
19133 case MVT::f64:
19134 return Subtarget->useFPVFMx64();
19135 default:
19136 break;
19137 }
19138
19139 return false;
19140}
19141
19142static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
19143 if (V < 0)
19144 return false;
19145
19146 unsigned Scale = 1;
19147 switch (VT.getSimpleVT().SimpleTy) {
19148 case MVT::i1:
19149 case MVT::i8:
19150 // Scale == 1;
19151 break;
19152 case MVT::i16:
19153 // Scale == 2;
19154 Scale = 2;
19155 break;
19156 default:
19157 // On thumb1 we load most things (i32, i64, floats, etc) with a LDR
19158 // Scale == 4;
19159 Scale = 4;
19160 break;
19161 }
19162
19163 if ((V & (Scale - 1)) != 0)
19164 return false;
19165 return isUInt<5>(V / Scale);
19166}
19167
19168static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
19169 const ARMSubtarget *Subtarget) {
19170 if (!VT.isInteger() && !VT.isFloatingPoint())
19171 return false;
19172 if (VT.isVector() && Subtarget->hasNEON())
19173 return false;
19174 if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
19175 !Subtarget->hasMVEFloatOps())
19176 return false;
19177
19178 bool IsNeg = false;
19179 if (V < 0) {
19180 IsNeg = true;
19181 V = -V;
19182 }
19183
19184 unsigned NumBytes = std::max((unsigned)VT.getSizeInBits() / 8, 1U);
19185
19186 // MVE: size * imm7
19187 if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
19188 switch (VT.getSimpleVT().getVectorElementType().SimpleTy) {
19189 case MVT::i32:
19190 case MVT::f32:
19191 return isShiftedUInt<7,2>(V);
19192 case MVT::i16:
19193 case MVT::f16:
19194 return isShiftedUInt<7,1>(V);
19195 case MVT::i8:
19196 return isUInt<7>(V);
19197 default:
19198 return false;
19199 }
19200 }
19201
19202 // half VLDR: 2 * imm8
19203 if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
19204 return isShiftedUInt<8, 1>(V);
19205 // VLDR and LDRD: 4 * imm8
19206 if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
19207 return isShiftedUInt<8, 2>(V);
19208
19209 if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
19210 // + imm12 or - imm8
19211 if (IsNeg)
19212 return isUInt<8>(V);
19213 return isUInt<12>(V);
19214 }
19215
19216 return false;
19217}
19218
19219/// isLegalAddressImmediate - Return true if the integer value can be used
19220/// as the offset of the target addressing mode for load / store of the
19221/// given type.
19222static bool isLegalAddressImmediate(int64_t V, EVT VT,
19223 const ARMSubtarget *Subtarget) {
19224 if (V == 0)
19225 return true;
19226
19227 if (!VT.isSimple())
19228 return false;
19229
19230 if (Subtarget->isThumb1Only())
19231 return isLegalT1AddressImmediate(V, VT);
19232 else if (Subtarget->isThumb2())
19233 return isLegalT2AddressImmediate(V, VT, Subtarget);
19234
19235 // ARM mode.
19236 if (V < 0)
19237 V = - V;
19238 switch (VT.getSimpleVT().SimpleTy) {
19239 default: return false;
19240 case MVT::i1:
19241 case MVT::i8:
19242 case MVT::i32:
19243 // +- imm12
19244 return isUInt<12>(V);
19245 case MVT::i16:
19246 // +- imm8
19247 return isUInt<8>(V);
19248 case MVT::f32:
19249 case MVT::f64:
19250 if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
19251 return false;
19252 return isShiftedUInt<8, 2>(V);
19253 }
19254}
19255
19257 EVT VT) const {
19258 int Scale = AM.Scale;
19259 if (Scale < 0)
19260 return false;
19261
19262 switch (VT.getSimpleVT().SimpleTy) {
19263 default: return false;
19264 case MVT::i1:
19265 case MVT::i8:
19266 case MVT::i16:
19267 case MVT::i32:
19268 if (Scale == 1)
19269 return true;
19270 // r + r << imm
19271 Scale = Scale & ~1;
19272 return Scale == 2 || Scale == 4 || Scale == 8;
19273 case MVT::i64:
19274 // FIXME: What are we trying to model here? ldrd doesn't have an r + r
19275 // version in Thumb mode.
19276 // r + r
19277 if (Scale == 1)
19278 return true;
19279 // r * 2 (this can be lowered to r + r).
19280 if (!AM.HasBaseReg && Scale == 2)
19281 return true;
19282 return false;
19283 case MVT::isVoid:
19284 // Note, we allow "void" uses (basically, uses that aren't loads or
19285 // stores), because arm allows folding a scale into many arithmetic
19286 // operations. This should be made more precise and revisited later.
19287
19288 // Allow r << imm, but the imm has to be a multiple of two.
19289 if (Scale & 1) return false;
19290 return isPowerOf2_32(Scale);
19291 }
19292}
19293
19295 EVT VT) const {
19296 const int Scale = AM.Scale;
19297
19298 // Negative scales are not supported in Thumb1.
19299 if (Scale < 0)
19300 return false;
19301
19302 // Thumb1 addressing modes do not support register scaling excepting the
19303 // following cases:
19304 // 1. Scale == 1 means no scaling.
19305 // 2. Scale == 2 this can be lowered to r + r if there is no base register.
19306 return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);
19307}
19308
19309/// isLegalAddressingMode - Return true if the addressing mode represented
19310/// by AM is legal for this target, for a load/store of the specified type.
19312 const AddrMode &AM, Type *Ty,
19313 unsigned AS, Instruction *I) const {
19314 EVT VT = getValueType(DL, Ty, true);
19315 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
19316 return false;
19317
19318 // Can never fold addr of global into load/store.
19319 if (AM.BaseGV)
19320 return false;
19321
19322 switch (AM.Scale) {
19323 case 0: // no scale reg, must be "r+i" or "r", or "i".
19324 break;
19325 default:
19326 // ARM doesn't support any R+R*scale+imm addr modes.
19327 if (AM.BaseOffs)
19328 return false;
19329
19330 if (!VT.isSimple())
19331 return false;
19332
19333 if (Subtarget->isThumb1Only())
19334 return isLegalT1ScaledAddressingMode(AM, VT);
19335
19336 if (Subtarget->isThumb2())
19337 return isLegalT2ScaledAddressingMode(AM, VT);
19338
19339 int Scale = AM.Scale;
19340 switch (VT.getSimpleVT().SimpleTy) {
19341 default: return false;
19342 case MVT::i1:
19343 case MVT::i8:
19344 case MVT::i32:
19345 if (Scale < 0) Scale = -Scale;
19346 if (Scale == 1)
19347 return true;
19348 // r + r << imm
19349 return isPowerOf2_32(Scale & ~1);
19350 case MVT::i16:
19351 case MVT::i64:
19352 // r +/- r
19353 if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
19354 return true;
19355 // r * 2 (this can be lowered to r + r).
19356 if (!AM.HasBaseReg && Scale == 2)
19357 return true;
19358 return false;
19359
19360 case MVT::isVoid:
19361 // Note, we allow "void" uses (basically, uses that aren't loads or
19362 // stores), because arm allows folding a scale into many arithmetic
19363 // operations. This should be made more precise and revisited later.
19364
19365 // Allow r << imm, but the imm has to be a multiple of two.
19366 if (Scale & 1) return false;
19367 return isPowerOf2_32(Scale);
19368 }
19369 }
19370 return true;
19371}
19372
19373/// isLegalICmpImmediate - Return true if the specified immediate is legal
19374/// icmp immediate, that is the target has icmp instructions which can compare
19375/// a register against the immediate without having to materialize the
19376/// immediate into a register.
19378 // Thumb2 and ARM modes can use cmn for negative immediates.
19379 if (!Subtarget->isThumb())
19380 return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
19381 ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
19382 if (Subtarget->isThumb2())
19383 return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
19384 ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
19385 // Thumb1 doesn't have cmn, and only 8-bit immediates.
19386 return Imm >= 0 && Imm <= 255;
19387}
19388
19389/// isLegalAddImmediate - Return true if the specified immediate is a legal add
19390/// *or sub* immediate, that is the target has add or sub instructions which can
19391/// add a register with the immediate without having to materialize the
19392/// immediate into a register.
19394 // Same encoding for add/sub, just flip the sign.
19395 uint64_t AbsImm = AbsoluteValue(Imm);
19396 if (!Subtarget->isThumb())
19397 return ARM_AM::getSOImmVal(AbsImm) != -1;
19398 if (Subtarget->isThumb2())
19399 return ARM_AM::getT2SOImmVal(AbsImm) != -1;
19400 // Thumb1 only has 8-bit unsigned immediate.
19401 return AbsImm <= 255;
19402}
19403
19404// Return false to prevent folding
19405// (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
19406// if the folding leads to worse code.
19408 SDValue ConstNode) const {
19409 // Let the DAGCombiner decide for vector types and large types.
19410 const EVT VT = AddNode.getValueType();
19411 if (VT.isVector() || VT.getScalarSizeInBits() > 32)
19412 return true;
19413
19414 // It is worse if c0 is legal add immediate, while c1*c0 is not
19415 // and has to be composed by at least two instructions.
19416 const ConstantSDNode *C0Node = cast<ConstantSDNode>(AddNode.getOperand(1));
19417 const ConstantSDNode *C1Node = cast<ConstantSDNode>(ConstNode);
19418 const int64_t C0 = C0Node->getSExtValue();
19419 APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue();
19421 return true;
19422 if (ConstantMaterializationCost((unsigned)CA.getZExtValue(), Subtarget) > 1)
19423 return false;
19424
19425 // Default to true and let the DAGCombiner decide.
19426 return true;
19427}
19428
19430 bool isSEXTLoad, SDValue &Base,
19431 SDValue &Offset, bool &isInc,
19432 SelectionDAG &DAG) {
19433 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19434 return false;
19435
19436 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
19437 // AddressingMode 3
19438 Base = Ptr->getOperand(0);
19440 int RHSC = (int)RHS->getZExtValue();
19441 if (RHSC < 0 && RHSC > -256) {
19442 assert(Ptr->getOpcode() == ISD::ADD);
19443 isInc = false;
19444 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19445 return true;
19446 }
19447 }
19448 isInc = (Ptr->getOpcode() == ISD::ADD);
19449 Offset = Ptr->getOperand(1);
19450 return true;
19451 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
19452 // AddressingMode 2
19454 int RHSC = (int)RHS->getZExtValue();
19455 if (RHSC < 0 && RHSC > -0x1000) {
19456 assert(Ptr->getOpcode() == ISD::ADD);
19457 isInc = false;
19458 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19459 Base = Ptr->getOperand(0);
19460 return true;
19461 }
19462 }
19463
19464 if (Ptr->getOpcode() == ISD::ADD) {
19465 isInc = true;
19466 ARM_AM::ShiftOpc ShOpcVal=
19468 if (ShOpcVal != ARM_AM::no_shift) {
19469 Base = Ptr->getOperand(1);
19470 Offset = Ptr->getOperand(0);
19471 } else {
19472 Base = Ptr->getOperand(0);
19473 Offset = Ptr->getOperand(1);
19474 }
19475 return true;
19476 }
19477
19478 isInc = (Ptr->getOpcode() == ISD::ADD);
19479 Base = Ptr->getOperand(0);
19480 Offset = Ptr->getOperand(1);
19481 return true;
19482 }
19483
19484 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
19485 return false;
19486}
19487
19489 bool isSEXTLoad, SDValue &Base,
19490 SDValue &Offset, bool &isInc,
19491 SelectionDAG &DAG) {
19492 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19493 return false;
19494
19495 Base = Ptr->getOperand(0);
19497 int RHSC = (int)RHS->getZExtValue();
19498 if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
19499 assert(Ptr->getOpcode() == ISD::ADD);
19500 isInc = false;
19501 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19502 return true;
19503 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
19504 isInc = Ptr->getOpcode() == ISD::ADD;
19505 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19506 return true;
19507 }
19508 }
19509
19510 return false;
19511}
19512
19513static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment,
19514 bool isSEXTLoad, bool IsMasked, bool isLE,
19516 bool &isInc, SelectionDAG &DAG) {
19517 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19518 return false;
19519 if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
19520 return false;
19521
19522 // We allow LE non-masked loads to change the type (for example use a vldrb.8
19523 // as opposed to a vldrw.32). This can allow extra addressing modes or
19524 // alignments for what is otherwise an equivalent instruction.
19525 bool CanChangeType = isLE && !IsMasked;
19526
19528 int RHSC = (int)RHS->getZExtValue();
19529
19530 auto IsInRange = [&](int RHSC, int Limit, int Scale) {
19531 if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
19532 assert(Ptr->getOpcode() == ISD::ADD);
19533 isInc = false;
19534 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19535 return true;
19536 } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
19537 isInc = Ptr->getOpcode() == ISD::ADD;
19538 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19539 return true;
19540 }
19541 return false;
19542 };
19543
19544 // Try to find a matching instruction based on s/zext, Alignment, Offset and
19545 // (in BE/masked) type.
19546 Base = Ptr->getOperand(0);
19547 if (VT == MVT::v4i16) {
19548 if (Alignment >= 2 && IsInRange(RHSC, 0x80, 2))
19549 return true;
19550 } else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
19551 if (IsInRange(RHSC, 0x80, 1))
19552 return true;
19553 } else if (Alignment >= 4 &&
19554 (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
19555 IsInRange(RHSC, 0x80, 4))
19556 return true;
19557 else if (Alignment >= 2 &&
19558 (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
19559 IsInRange(RHSC, 0x80, 2))
19560 return true;
19561 else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
19562 return true;
19563 return false;
19564}
19565
19566/// getPreIndexedAddressParts - returns true by value, base pointer and
19567/// offset pointer and addressing mode by reference if the node's address
19568/// can be legally represented as pre-indexed load / store address.
19569bool
19571 SDValue &Offset,
19573 SelectionDAG &DAG) const {
19574 if (Subtarget->isThumb1Only())
19575 return false;
19576
19577 EVT VT;
19578 SDValue Ptr;
19579 Align Alignment;
19580 bool isSEXTLoad = false;
19581 bool IsMasked = false;
19582 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19583 Ptr = LD->getBasePtr();
19584 VT = LD->getMemoryVT();
19585 Alignment = LD->getAlign();
19586 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19587 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19588 Ptr = ST->getBasePtr();
19589 VT = ST->getMemoryVT();
19590 Alignment = ST->getAlign();
19591 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19592 Ptr = LD->getBasePtr();
19593 VT = LD->getMemoryVT();
19594 Alignment = LD->getAlign();
19595 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19596 IsMasked = true;
19598 Ptr = ST->getBasePtr();
19599 VT = ST->getMemoryVT();
19600 Alignment = ST->getAlign();
19601 IsMasked = true;
19602 } else
19603 return false;
19604
19605 bool isInc;
19606 bool isLegal = false;
19607 if (VT.isVector())
19608 isLegal = Subtarget->hasMVEIntegerOps() &&
19610 Ptr.getNode(), VT, Alignment, isSEXTLoad, IsMasked,
19611 Subtarget->isLittle(), Base, Offset, isInc, DAG);
19612 else {
19613 if (Subtarget->isThumb2())
19614 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19615 Offset, isInc, DAG);
19616 else
19617 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19618 Offset, isInc, DAG);
19619 }
19620 if (!isLegal)
19621 return false;
19622
19623 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
19624 return true;
19625}
19626
19627/// getPostIndexedAddressParts - returns true by value, base pointer and
19628/// offset pointer and addressing mode by reference if this node can be
19629/// combined with a load / store to form a post-indexed load / store.
19631 SDValue &Base,
19632 SDValue &Offset,
19634 SelectionDAG &DAG) const {
19635 EVT VT;
19636 SDValue Ptr;
19637 Align Alignment;
19638 bool isSEXTLoad = false, isNonExt;
19639 bool IsMasked = false;
19640 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19641 VT = LD->getMemoryVT();
19642 Ptr = LD->getBasePtr();
19643 Alignment = LD->getAlign();
19644 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19645 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19646 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19647 VT = ST->getMemoryVT();
19648 Ptr = ST->getBasePtr();
19649 Alignment = ST->getAlign();
19650 isNonExt = !ST->isTruncatingStore();
19651 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19652 VT = LD->getMemoryVT();
19653 Ptr = LD->getBasePtr();
19654 Alignment = LD->getAlign();
19655 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19656 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19657 IsMasked = true;
19659 VT = ST->getMemoryVT();
19660 Ptr = ST->getBasePtr();
19661 Alignment = ST->getAlign();
19662 isNonExt = !ST->isTruncatingStore();
19663 IsMasked = true;
19664 } else
19665 return false;
19666
19667 if (Subtarget->isThumb1Only()) {
19668 // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
19669 // must be non-extending/truncating, i32, with an offset of 4.
19670 assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
19671 if (Op->getOpcode() != ISD::ADD || !isNonExt)
19672 return false;
19673 auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
19674 if (!RHS || RHS->getZExtValue() != 4)
19675 return false;
19676 if (Alignment < Align(4))
19677 return false;
19678
19679 Offset = Op->getOperand(1);
19680 Base = Op->getOperand(0);
19681 AM = ISD::POST_INC;
19682 return true;
19683 }
19684
19685 bool isInc;
19686 bool isLegal = false;
19687 if (VT.isVector())
19688 isLegal = Subtarget->hasMVEIntegerOps() &&
19689 getMVEIndexedAddressParts(Op, VT, Alignment, isSEXTLoad, IsMasked,
19690 Subtarget->isLittle(), Base, Offset,
19691 isInc, DAG);
19692 else {
19693 if (Subtarget->isThumb2())
19694 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
19695 isInc, DAG);
19696 else
19697 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
19698 isInc, DAG);
19699 }
19700 if (!isLegal)
19701 return false;
19702
19703 if (Ptr != Base) {
19704 // Swap base ptr and offset to catch more post-index load / store when
19705 // it's legal. In Thumb2 mode, offset must be an immediate.
19706 if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
19707 !Subtarget->isThumb2())
19709
19710 // Post-indexed load / store update the base pointer.
19711 if (Ptr != Base)
19712 return false;
19713 }
19714
19715 AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
19716 return true;
19717}
19718
19720 KnownBits &Known,
19721 const APInt &DemandedElts,
19722 const SelectionDAG &DAG,
19723 unsigned Depth) const {
19724 unsigned BitWidth = Known.getBitWidth();
19725 Known.resetAll();
19726 switch (Op.getOpcode()) {
19727 default: break;
19728 case ARMISD::ADDC:
19729 case ARMISD::ADDE:
19730 case ARMISD::SUBC:
19731 case ARMISD::SUBE:
19732 // Special cases when we convert a carry to a boolean.
19733 if (Op.getResNo() == 0) {
19734 SDValue LHS = Op.getOperand(0);
19735 SDValue RHS = Op.getOperand(1);
19736 // (ADDE 0, 0, C) will give us a single bit.
19737 if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
19738 isNullConstant(RHS)) {
19740 return;
19741 }
19742 }
19743 break;
19744 case ARMISD::CMOV: {
19745 // Bits are known zero/one if known on the LHS and RHS.
19746 Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
19747 if (Known.isUnknown())
19748 return;
19749
19750 KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
19751 Known = Known.intersectWith(KnownRHS);
19752 return;
19753 }
19755 Intrinsic::ID IntID =
19756 static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1));
19757 switch (IntID) {
19758 default: return;
19759 case Intrinsic::arm_ldaex:
19760 case Intrinsic::arm_ldrex: {
19761 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
19762 unsigned MemBits = VT.getScalarSizeInBits();
19763 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
19764 return;
19765 }
19766 }
19767 }
19768 case ARMISD::BFI: {
19769 // Conservatively, we can recurse down the first operand
19770 // and just mask out all affected bits.
19771 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
19772
19773 // The operand to BFI is already a mask suitable for removing the bits it
19774 // sets.
19775 const APInt &Mask = Op.getConstantOperandAPInt(2);
19776 Known.Zero &= Mask;
19777 Known.One &= Mask;
19778 return;
19779 }
19780 case ARMISD::VGETLANEs:
19781 case ARMISD::VGETLANEu: {
19782 const SDValue &SrcSV = Op.getOperand(0);
19783 EVT VecVT = SrcSV.getValueType();
19784 assert(VecVT.isVector() && "VGETLANE expected a vector type");
19785 const unsigned NumSrcElts = VecVT.getVectorNumElements();
19786 ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
19787 assert(Pos->getAPIntValue().ult(NumSrcElts) &&
19788 "VGETLANE index out of bounds");
19789 unsigned Idx = Pos->getZExtValue();
19790 APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
19791 Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
19792
19793 EVT VT = Op.getValueType();
19794 const unsigned DstSz = VT.getScalarSizeInBits();
19795 const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
19796 (void)SrcSz;
19797 assert(SrcSz == Known.getBitWidth());
19798 assert(DstSz > SrcSz);
19799 if (Op.getOpcode() == ARMISD::VGETLANEs)
19800 Known = Known.sext(DstSz);
19801 else {
19802 Known = Known.zext(DstSz);
19803 }
19804 assert(DstSz == Known.getBitWidth());
19805 break;
19806 }
19807 case ARMISD::VMOVrh: {
19808 KnownBits KnownOp = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
19809 assert(KnownOp.getBitWidth() == 16);
19810 Known = KnownOp.zext(32);
19811 break;
19812 }
19813 case ARMISD::CSINC:
19814 case ARMISD::CSINV:
19815 case ARMISD::CSNEG: {
19816 KnownBits KnownOp0 = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
19817 KnownBits KnownOp1 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
19818
19819 // The result is either:
19820 // CSINC: KnownOp0 or KnownOp1 + 1
19821 // CSINV: KnownOp0 or ~KnownOp1
19822 // CSNEG: KnownOp0 or KnownOp1 * -1
19823 if (Op.getOpcode() == ARMISD::CSINC)
19824 KnownOp1 =
19825 KnownBits::add(KnownOp1, KnownBits::makeConstant(APInt(32, 1)));
19826 else if (Op.getOpcode() == ARMISD::CSINV)
19827 std::swap(KnownOp1.Zero, KnownOp1.One);
19828 else if (Op.getOpcode() == ARMISD::CSNEG)
19829 KnownOp1 = KnownBits::mul(KnownOp1,
19831
19832 Known = KnownOp0.intersectWith(KnownOp1);
19833 break;
19834 }
19835 case ARMISD::VORRIMM:
19836 case ARMISD::VBICIMM: {
19837 unsigned Encoded = Op.getConstantOperandVal(1);
19838 unsigned DecEltBits = 0;
19839 uint64_t DecodedVal = ARM_AM::decodeVMOVModImm(Encoded, DecEltBits);
19840
19841 unsigned EltBits = Op.getScalarValueSizeInBits();
19842 if (EltBits != DecEltBits) {
19843 // Be conservative: only update Known when EltBits == DecEltBits.
19844 // This is believed to always be true for VORRIMM/VBICIMM today, but if
19845 // that changes in the future, doing nothing here is safer than risking
19846 // subtle bugs.
19847 break;
19848 }
19849
19850 KnownBits KnownLHS = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
19851 bool IsVORR = Op.getOpcode() == ARMISD::VORRIMM;
19852 APInt Imm(DecEltBits, DecodedVal);
19853
19854 Known.One = IsVORR ? (KnownLHS.One | Imm) : (KnownLHS.One & ~Imm);
19855 Known.Zero = IsVORR ? (KnownLHS.Zero & ~Imm) : (KnownLHS.Zero | Imm);
19856 break;
19857 }
19858 }
19859}
19860
19862 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
19863 TargetLoweringOpt &TLO) const {
19864 // Delay optimization, so we don't have to deal with illegal types, or block
19865 // optimizations.
19866 if (!TLO.LegalOps)
19867 return false;
19868
19869 // Only optimize AND for now.
19870 if (Op.getOpcode() != ISD::AND)
19871 return false;
19872
19873 EVT VT = Op.getValueType();
19874
19875 // Ignore vectors.
19876 if (VT.isVector())
19877 return false;
19878
19879 assert(VT == MVT::i32 && "Unexpected integer type");
19880
19881 // Make sure the RHS really is a constant.
19882 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
19883 if (!C)
19884 return false;
19885
19886 unsigned Mask = C->getZExtValue();
19887
19888 unsigned Demanded = DemandedBits.getZExtValue();
19889 unsigned ShrunkMask = Mask & Demanded;
19890 unsigned ExpandedMask = Mask | ~Demanded;
19891
19892 // If the mask is all zeros, let the target-independent code replace the
19893 // result with zero.
19894 if (ShrunkMask == 0)
19895 return false;
19896
19897 // If the mask is all ones, erase the AND. (Currently, the target-independent
19898 // code won't do this, so we have to do it explicitly to avoid an infinite
19899 // loop in obscure cases.)
19900 if (ExpandedMask == ~0U)
19901 return TLO.CombineTo(Op, Op.getOperand(0));
19902
19903 auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
19904 return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
19905 };
19906 auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
19907 if (NewMask == Mask)
19908 return true;
19909 SDLoc DL(Op);
19910 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
19911 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
19912 return TLO.CombineTo(Op, NewOp);
19913 };
19914
19915 // Prefer uxtb mask.
19916 if (IsLegalMask(0xFF))
19917 return UseMask(0xFF);
19918
19919 // Prefer uxth mask.
19920 if (IsLegalMask(0xFFFF))
19921 return UseMask(0xFFFF);
19922
19923 // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
19924 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
19925 if (ShrunkMask < 256)
19926 return UseMask(ShrunkMask);
19927
19928 // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
19929 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
19930 if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
19931 return UseMask(ExpandedMask);
19932
19933 // Potential improvements:
19934 //
19935 // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
19936 // We could try to prefer Thumb1 immediates which can be lowered to a
19937 // two-instruction sequence.
19938 // We could try to recognize more legal ARM/Thumb2 immediates here.
19939
19940 return false;
19941}
19942
19944 SDValue Op, const APInt &OriginalDemandedBits,
19945 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
19946 unsigned Depth) const {
19947 unsigned Opc = Op.getOpcode();
19948
19949 switch (Opc) {
19950 case ARMISD::ASRL:
19951 case ARMISD::LSRL: {
19952 // If this is result 0 and the other result is unused, see if the demand
19953 // bits allow us to shrink this long shift into a standard small shift in
19954 // the opposite direction.
19955 if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
19956 isa<ConstantSDNode>(Op->getOperand(2))) {
19957 unsigned ShAmt = Op->getConstantOperandVal(2);
19958 if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(APInt::getAllOnes(32)
19959 << (32 - ShAmt)))
19960 return TLO.CombineTo(
19961 Op, TLO.DAG.getNode(
19962 ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1),
19963 TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32)));
19964 }
19965 break;
19966 }
19967 case ARMISD::VBICIMM: {
19968 SDValue Op0 = Op.getOperand(0);
19969 unsigned ModImm = Op.getConstantOperandVal(1);
19970 unsigned EltBits = 0;
19971 uint64_t Mask = ARM_AM::decodeVMOVModImm(ModImm, EltBits);
19972 if ((OriginalDemandedBits & Mask) == 0)
19973 return TLO.CombineTo(Op, Op0);
19974 }
19975 }
19976
19978 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
19979}
19980
19981//===----------------------------------------------------------------------===//
19982// ARM Inline Assembly Support
19983//===----------------------------------------------------------------------===//
19984
19985const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
19986 // At this point, we have to lower this constraint to something else, so we
19987 // lower it to an "r" or "w". However, by doing this we will force the result
19988 // to be in register, while the X constraint is much more permissive.
19989 //
19990 // Although we are correct (we are free to emit anything, without
19991 // constraints), we might break use cases that would expect us to be more
19992 // efficient and emit something else.
19993 if (!Subtarget->hasVFP2Base())
19994 return "r";
19995 if (ConstraintVT.isFloatingPoint())
19996 return "w";
19997 if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
19998 (ConstraintVT.getSizeInBits() == 64 ||
19999 ConstraintVT.getSizeInBits() == 128))
20000 return "w";
20001
20002 return "r";
20003}
20004
20005/// getConstraintType - Given a constraint letter, return the type of
20006/// constraint it is for this target.
20009 unsigned S = Constraint.size();
20010 if (S == 1) {
20011 switch (Constraint[0]) {
20012 default: break;
20013 case 'l': return C_RegisterClass;
20014 case 'w': return C_RegisterClass;
20015 case 'h': return C_RegisterClass;
20016 case 'x': return C_RegisterClass;
20017 case 't': return C_RegisterClass;
20018 case 'j': return C_Immediate; // Constant for movw.
20019 // An address with a single base register. Due to the way we
20020 // currently handle addresses it is the same as an 'r' memory constraint.
20021 case 'Q': return C_Memory;
20022 }
20023 } else if (S == 2) {
20024 switch (Constraint[0]) {
20025 default: break;
20026 case 'T': return C_RegisterClass;
20027 // All 'U+' constraints are addresses.
20028 case 'U': return C_Memory;
20029 }
20030 }
20031 return TargetLowering::getConstraintType(Constraint);
20032}
20033
20034/// Examine constraint type and operand type and determine a weight value.
20035/// This object must already have been set up with the operand type
20036/// and the current alternative constraint selected.
20039 AsmOperandInfo &info, const char *constraint) const {
20041 Value *CallOperandVal = info.CallOperandVal;
20042 // If we don't have a value, we can't do a match,
20043 // but allow it at the lowest weight.
20044 if (!CallOperandVal)
20045 return CW_Default;
20046 Type *type = CallOperandVal->getType();
20047 // Look at the constraint type.
20048 switch (*constraint) {
20049 default:
20051 break;
20052 case 'l':
20053 if (type->isIntegerTy()) {
20054 if (Subtarget->isThumb())
20055 weight = CW_SpecificReg;
20056 else
20057 weight = CW_Register;
20058 }
20059 break;
20060 case 'w':
20061 if (type->isFloatingPointTy())
20062 weight = CW_Register;
20063 break;
20064 }
20065 return weight;
20066}
20067
20068static bool isIncompatibleReg(const MCPhysReg &PR, MVT VT) {
20069 if (PR == 0 || VT == MVT::Other)
20070 return false;
20071 if (ARM::SPRRegClass.contains(PR))
20072 return VT != MVT::f32 && VT != MVT::f16 && VT != MVT::i32;
20073 if (ARM::DPRRegClass.contains(PR))
20074 return VT != MVT::f64 && !VT.is64BitVector();
20075 return false;
20076}
20077
20078using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
20079
20081 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
20082 switch (Constraint.size()) {
20083 case 1:
20084 // GCC ARM Constraint Letters
20085 switch (Constraint[0]) {
20086 case 'l': // Low regs or general regs.
20087 if (Subtarget->isThumb())
20088 return RCPair(0U, &ARM::tGPRRegClass);
20089 return RCPair(0U, &ARM::GPRRegClass);
20090 case 'h': // High regs or no regs.
20091 if (Subtarget->isThumb())
20092 return RCPair(0U, &ARM::hGPRRegClass);
20093 break;
20094 case 'r':
20095 if (Subtarget->isThumb1Only())
20096 return RCPair(0U, &ARM::tGPRRegClass);
20097 return RCPair(0U, &ARM::GPRRegClass);
20098 case 'w':
20099 if (VT == MVT::Other)
20100 break;
20101 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20102 return RCPair(0U, &ARM::SPRRegClass);
20103 if (VT.getSizeInBits() == 64)
20104 return RCPair(0U, &ARM::DPRRegClass);
20105 if (VT.getSizeInBits() == 128)
20106 return RCPair(0U, &ARM::QPRRegClass);
20107 break;
20108 case 'x':
20109 if (VT == MVT::Other)
20110 break;
20111 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20112 return RCPair(0U, &ARM::SPR_8RegClass);
20113 if (VT.getSizeInBits() == 64)
20114 return RCPair(0U, &ARM::DPR_8RegClass);
20115 if (VT.getSizeInBits() == 128)
20116 return RCPair(0U, &ARM::QPR_8RegClass);
20117 break;
20118 case 't':
20119 if (VT == MVT::Other)
20120 break;
20121 if (VT == MVT::f32 || VT == MVT::i32 || VT == MVT::f16 || VT == MVT::bf16)
20122 return RCPair(0U, &ARM::SPRRegClass);
20123 if (VT.getSizeInBits() == 64)
20124 return RCPair(0U, &ARM::DPR_VFP2RegClass);
20125 if (VT.getSizeInBits() == 128)
20126 return RCPair(0U, &ARM::QPR_VFP2RegClass);
20127 break;
20128 }
20129 break;
20130
20131 case 2:
20132 if (Constraint[0] == 'T') {
20133 switch (Constraint[1]) {
20134 default:
20135 break;
20136 case 'e':
20137 return RCPair(0U, &ARM::tGPREvenRegClass);
20138 case 'o':
20139 return RCPair(0U, &ARM::tGPROddRegClass);
20140 }
20141 }
20142 break;
20143
20144 default:
20145 break;
20146 }
20147
20148 if (StringRef("{cc}").equals_insensitive(Constraint))
20149 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
20150
20151 auto RCP = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
20152 if (isIncompatibleReg(RCP.first, VT))
20153 return {0, nullptr};
20154 return RCP;
20155}
20156
20157/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
20158/// vector. If it is invalid, don't add anything to Ops.
20160 StringRef Constraint,
20161 std::vector<SDValue> &Ops,
20162 SelectionDAG &DAG) const {
20163 SDValue Result;
20164
20165 // Currently only support length 1 constraints.
20166 if (Constraint.size() != 1)
20167 return;
20168
20169 char ConstraintLetter = Constraint[0];
20170 switch (ConstraintLetter) {
20171 default: break;
20172 case 'j':
20173 case 'I': case 'J': case 'K': case 'L':
20174 case 'M': case 'N': case 'O':
20176 if (!C)
20177 return;
20178
20179 int64_t CVal64 = C->getSExtValue();
20180 int CVal = (int) CVal64;
20181 // None of these constraints allow values larger than 32 bits. Check
20182 // that the value fits in an int.
20183 if (CVal != CVal64)
20184 return;
20185
20186 switch (ConstraintLetter) {
20187 case 'j':
20188 // Constant suitable for movw, must be between 0 and
20189 // 65535.
20190 if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
20191 if (CVal >= 0 && CVal <= 65535)
20192 break;
20193 return;
20194 case 'I':
20195 if (Subtarget->isThumb1Only()) {
20196 // This must be a constant between 0 and 255, for ADD
20197 // immediates.
20198 if (CVal >= 0 && CVal <= 255)
20199 break;
20200 } else if (Subtarget->isThumb2()) {
20201 // A constant that can be used as an immediate value in a
20202 // data-processing instruction.
20203 if (ARM_AM::getT2SOImmVal(CVal) != -1)
20204 break;
20205 } else {
20206 // A constant that can be used as an immediate value in a
20207 // data-processing instruction.
20208 if (ARM_AM::getSOImmVal(CVal) != -1)
20209 break;
20210 }
20211 return;
20212
20213 case 'J':
20214 if (Subtarget->isThumb1Only()) {
20215 // This must be a constant between -255 and -1, for negated ADD
20216 // immediates. This can be used in GCC with an "n" modifier that
20217 // prints the negated value, for use with SUB instructions. It is
20218 // not useful otherwise but is implemented for compatibility.
20219 if (CVal >= -255 && CVal <= -1)
20220 break;
20221 } else {
20222 // This must be a constant between -4095 and 4095. This is suitable
20223 // for use as the immediate offset field in LDR and STR instructions
20224 // such as LDR r0,[r1,#offset].
20225 if (CVal >= -4095 && CVal <= 4095)
20226 break;
20227 }
20228 return;
20229
20230 case 'K':
20231 if (Subtarget->isThumb1Only()) {
20232 // A 32-bit value where only one byte has a nonzero value. Exclude
20233 // zero to match GCC. This constraint is used by GCC internally for
20234 // constants that can be loaded with a move/shift combination.
20235 // It is not useful otherwise but is implemented for compatibility.
20236 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
20237 break;
20238 } else if (Subtarget->isThumb2()) {
20239 // A constant whose bitwise inverse can be used as an immediate
20240 // value in a data-processing instruction. This can be used in GCC
20241 // with a "B" modifier that prints the inverted value, for use with
20242 // BIC and MVN instructions. It is not useful otherwise but is
20243 // implemented for compatibility.
20244 if (ARM_AM::getT2SOImmVal(~CVal) != -1)
20245 break;
20246 } else {
20247 // A constant whose bitwise inverse can be used as an immediate
20248 // value in a data-processing instruction. This can be used in GCC
20249 // with a "B" modifier that prints the inverted value, for use with
20250 // BIC and MVN instructions. It is not useful otherwise but is
20251 // implemented for compatibility.
20252 if (ARM_AM::getSOImmVal(~CVal) != -1)
20253 break;
20254 }
20255 return;
20256
20257 case 'L':
20258 if (Subtarget->isThumb1Only()) {
20259 // This must be a constant between -7 and 7,
20260 // for 3-operand ADD/SUB immediate instructions.
20261 if (CVal >= -7 && CVal < 7)
20262 break;
20263 } else if (Subtarget->isThumb2()) {
20264 // A constant whose negation can be used as an immediate value in a
20265 // data-processing instruction. This can be used in GCC with an "n"
20266 // modifier that prints the negated value, for use with SUB
20267 // instructions. It is not useful otherwise but is implemented for
20268 // compatibility.
20269 if (ARM_AM::getT2SOImmVal(-CVal) != -1)
20270 break;
20271 } else {
20272 // A constant whose negation can be used as an immediate value in a
20273 // data-processing instruction. This can be used in GCC with an "n"
20274 // modifier that prints the negated value, for use with SUB
20275 // instructions. It is not useful otherwise but is implemented for
20276 // compatibility.
20277 if (ARM_AM::getSOImmVal(-CVal) != -1)
20278 break;
20279 }
20280 return;
20281
20282 case 'M':
20283 if (Subtarget->isThumb1Only()) {
20284 // This must be a multiple of 4 between 0 and 1020, for
20285 // ADD sp + immediate.
20286 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
20287 break;
20288 } else {
20289 // A power of two or a constant between 0 and 32. This is used in
20290 // GCC for the shift amount on shifted register operands, but it is
20291 // useful in general for any shift amounts.
20292 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
20293 break;
20294 }
20295 return;
20296
20297 case 'N':
20298 if (Subtarget->isThumb1Only()) {
20299 // This must be a constant between 0 and 31, for shift amounts.
20300 if (CVal >= 0 && CVal <= 31)
20301 break;
20302 }
20303 return;
20304
20305 case 'O':
20306 if (Subtarget->isThumb1Only()) {
20307 // This must be a multiple of 4 between -508 and 508, for
20308 // ADD/SUB sp = sp + immediate.
20309 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
20310 break;
20311 }
20312 return;
20313 }
20314 Result = DAG.getSignedTargetConstant(CVal, SDLoc(Op), Op.getValueType());
20315 break;
20316 }
20317
20318 if (Result.getNode()) {
20319 Ops.push_back(Result);
20320 return;
20321 }
20322 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20323}
20324
20325static RTLIB::Libcall getDivRemLibcall(
20326 const SDNode *N, MVT::SimpleValueType SVT) {
20327 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20328 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20329 "Unhandled Opcode in getDivRemLibcall");
20330 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20331 N->getOpcode() == ISD::SREM;
20332 RTLIB::Libcall LC;
20333 switch (SVT) {
20334 default: llvm_unreachable("Unexpected request for libcall!");
20335 case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
20336 case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
20337 case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
20338 case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
20339 }
20340 return LC;
20341}
20342
20344 const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
20345 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20346 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20347 "Unhandled Opcode in getDivRemArgList");
20348 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20349 N->getOpcode() == ISD::SREM;
20351 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20352 EVT ArgVT = N->getOperand(i).getValueType();
20353 Type *ArgTy = ArgVT.getTypeForEVT(*Context);
20354 TargetLowering::ArgListEntry Entry(N->getOperand(i), ArgTy);
20355 Entry.IsSExt = isSigned;
20356 Entry.IsZExt = !isSigned;
20357 Args.push_back(Entry);
20358 }
20359 if (Subtarget->isTargetWindows() && Args.size() >= 2)
20360 std::swap(Args[0], Args[1]);
20361 return Args;
20362}
20363
20364SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
20365 assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
20366 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
20367 Subtarget->isTargetFuchsia() || Subtarget->isTargetWindows()) &&
20368 "Register-based DivRem lowering only");
20369 unsigned Opcode = Op->getOpcode();
20370 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
20371 "Invalid opcode for Div/Rem lowering");
20372 bool isSigned = (Opcode == ISD::SDIVREM);
20373 EVT VT = Op->getValueType(0);
20374 SDLoc dl(Op);
20375
20376 if (VT == MVT::i64 && isa<ConstantSDNode>(Op.getOperand(1))) {
20378 if (expandDIVREMByConstant(Op.getNode(), Result, MVT::i32, DAG)) {
20379 SDValue Res0 =
20380 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[0], Result[1]);
20381 SDValue Res1 =
20382 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[2], Result[3]);
20383 return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
20384 {Res0, Res1});
20385 }
20386 }
20387
20388 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
20389
20390 // If the target has hardware divide, use divide + multiply + subtract:
20391 // div = a / b
20392 // rem = a - b * div
20393 // return {div, rem}
20394 // This should be lowered into UDIV/SDIV + MLS later on.
20395 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
20396 : Subtarget->hasDivideInARMMode();
20397 if (hasDivide && Op->getValueType(0).isSimple() &&
20398 Op->getSimpleValueType(0) == MVT::i32) {
20399 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
20400 const SDValue Dividend = Op->getOperand(0);
20401 const SDValue Divisor = Op->getOperand(1);
20402 SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
20403 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
20404 SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
20405
20406 SDValue Values[2] = {Div, Rem};
20407 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
20408 }
20409
20410 RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
20411 VT.getSimpleVT().SimpleTy);
20412 SDValue InChain = DAG.getEntryNode();
20413
20415 DAG.getContext(),
20416 Subtarget);
20417
20420
20421 Type *RetTy = StructType::get(Ty, Ty);
20422
20423 if (Subtarget->isTargetWindows())
20424 InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
20425
20426 TargetLowering::CallLoweringInfo CLI(DAG);
20427 CLI.setDebugLoc(dl).setChain(InChain)
20428 .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
20430
20431 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
20432 return CallInfo.first;
20433}
20434
20435// Lowers REM using divmod helpers
20436// see RTABI section 4.2/4.3
20437SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
20438 EVT VT = N->getValueType(0);
20439
20440 if (VT == MVT::i64 && isa<ConstantSDNode>(N->getOperand(1))) {
20442 if (expandDIVREMByConstant(N, Result, MVT::i32, DAG))
20443 return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), N->getValueType(0),
20444 Result[0], Result[1]);
20445 }
20446
20447 // Build return types (div and rem)
20448 std::vector<Type*> RetTyParams;
20449 Type *RetTyElement;
20450
20451 switch (VT.getSimpleVT().SimpleTy) {
20452 default: llvm_unreachable("Unexpected request for libcall!");
20453 case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
20454 case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
20455 case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
20456 case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
20457 }
20458
20459 RetTyParams.push_back(RetTyElement);
20460 RetTyParams.push_back(RetTyElement);
20461 ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
20462 Type *RetTy = StructType::get(*DAG.getContext(), ret);
20463
20464 RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
20465 SimpleTy);
20466 SDValue InChain = DAG.getEntryNode();
20468 Subtarget);
20469 bool isSigned = N->getOpcode() == ISD::SREM;
20472
20473 if (Subtarget->isTargetWindows())
20474 InChain = WinDBZCheckDenominator(DAG, N, InChain);
20475
20476 // Lower call
20477 CallLoweringInfo CLI(DAG);
20478 CLI.setChain(InChain)
20479 .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
20481 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
20482
20483 // Return second (rem) result operand (first contains div)
20484 SDNode *ResNode = CallResult.first.getNode();
20485 assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
20486 return ResNode->getOperand(1);
20487}
20488
20489SDValue
20490ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
20491 assert(Subtarget->isTargetWindows() && "unsupported target platform");
20492 SDLoc DL(Op);
20493
20494 // Get the inputs.
20495 SDValue Chain = Op.getOperand(0);
20496 SDValue Size = Op.getOperand(1);
20497
20499 "no-stack-arg-probe")) {
20500 MaybeAlign Align =
20501 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
20502 SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20503 Chain = SP.getValue(1);
20504 SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);
20505 if (Align)
20506 SP = DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
20507 DAG.getSignedConstant(-Align->value(), DL, MVT::i32));
20508 Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);
20509 SDValue Ops[2] = { SP, Chain };
20510 return DAG.getMergeValues(Ops, DL);
20511 }
20512
20513 SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
20514 DAG.getConstant(2, DL, MVT::i32));
20515
20516 SDValue Glue;
20517 Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Glue);
20518 Glue = Chain.getValue(1);
20519
20520 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20521 Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Glue);
20522
20523 SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20524 Chain = NewSP.getValue(1);
20525
20526 SDValue Ops[2] = { NewSP, Chain };
20527 return DAG.getMergeValues(Ops, DL);
20528}
20529
20530SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
20531 bool IsStrict = Op->isStrictFPOpcode();
20532 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20533 const unsigned DstSz = Op.getValueType().getSizeInBits();
20534 const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
20535 assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
20536 "Unexpected type for custom-lowering FP_EXTEND");
20537
20538 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20539 "With both FP DP and 16, any FP conversion is legal!");
20540
20541 assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
20542 "With FP16, 16 to 32 conversion is legal!");
20543
20544 // Converting from 32 -> 64 is valid if we have FP64.
20545 if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
20546 // FIXME: Remove this when we have strict fp instruction selection patterns
20547 if (IsStrict) {
20548 SDLoc Loc(Op);
20549 SDValue Result = DAG.getNode(ISD::FP_EXTEND,
20550 Loc, Op.getValueType(), SrcVal);
20551 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
20552 }
20553 return Op;
20554 }
20555
20556 // Either we are converting from 16 -> 64, without FP16 and/or
20557 // FP.double-precision or without Armv8-fp. So we must do it in two
20558 // steps.
20559 // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
20560 // without FP16. So we must do a function call.
20561 SDLoc Loc(Op);
20562 RTLIB::Libcall LC;
20563 MakeLibCallOptions CallOptions;
20564 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20565 for (unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {
20566 bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
20567 MVT SrcVT = (Sz == 16 ? MVT::f16 : MVT::f32);
20568 MVT DstVT = (Sz == 16 ? MVT::f32 : MVT::f64);
20569 if (Supported) {
20570 if (IsStrict) {
20571 SrcVal = DAG.getNode(ISD::STRICT_FP_EXTEND, Loc,
20572 {DstVT, MVT::Other}, {Chain, SrcVal});
20573 Chain = SrcVal.getValue(1);
20574 } else {
20575 SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, DstVT, SrcVal);
20576 }
20577 } else {
20578 LC = RTLIB::getFPEXT(SrcVT, DstVT);
20579 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20580 "Unexpected type for custom-lowering FP_EXTEND");
20581 std::tie(SrcVal, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20582 Loc, Chain);
20583 }
20584 }
20585
20586 return IsStrict ? DAG.getMergeValues({SrcVal, Chain}, Loc) : SrcVal;
20587}
20588
20589SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
20590 bool IsStrict = Op->isStrictFPOpcode();
20591
20592 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20593 EVT SrcVT = SrcVal.getValueType();
20594 EVT DstVT = Op.getValueType();
20595 const unsigned DstSz = Op.getValueType().getSizeInBits();
20596 const unsigned SrcSz = SrcVT.getSizeInBits();
20597 (void)DstSz;
20598 assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&
20599 "Unexpected type for custom-lowering FP_ROUND");
20600
20601 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20602 "With both FP DP and 16, any FP conversion is legal!");
20603
20604 SDLoc Loc(Op);
20605
20606 // Instruction from 32 -> 16 if hasFP16 is valid
20607 if (SrcSz == 32 && Subtarget->hasFP16())
20608 return Op;
20609
20610 // Lib call from 32 -> 16 / 64 -> [32, 16]
20611 RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
20612 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20613 "Unexpected type for custom-lowering FP_ROUND");
20614 MakeLibCallOptions CallOptions;
20615 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20617 std::tie(Result, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20618 Loc, Chain);
20619 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
20620}
20621
20622bool
20624 // The ARM target isn't yet aware of offsets.
20625 return false;
20626}
20627
20629 if (v == 0xffffffff)
20630 return false;
20631
20632 // there can be 1's on either or both "outsides", all the "inside"
20633 // bits must be 0's
20634 return isShiftedMask_32(~v);
20635}
20636
20637/// isFPImmLegal - Returns true if the target can instruction select the
20638/// specified FP immediate natively. If false, the legalizer will
20639/// materialize the FP immediate as a load from a constant pool.
20641 bool ForCodeSize) const {
20642 if (!Subtarget->hasVFP3Base())
20643 return false;
20644 if (VT == MVT::f16 && Subtarget->hasFullFP16())
20645 return ARM_AM::getFP16Imm(Imm) != -1;
20646 if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
20647 ARM_AM::getFP32FP16Imm(Imm) != -1)
20648 return true;
20649 if (VT == MVT::f32)
20650 return ARM_AM::getFP32Imm(Imm) != -1;
20651 if (VT == MVT::f64 && Subtarget->hasFP64())
20652 return ARM_AM::getFP64Imm(Imm) != -1;
20653 return false;
20654}
20655
20656/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
20657/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
20658/// specified in the intrinsic calls.
20660 const CallInst &I,
20661 MachineFunction &MF,
20662 unsigned Intrinsic) const {
20663 switch (Intrinsic) {
20664 case Intrinsic::arm_neon_vld1:
20665 case Intrinsic::arm_neon_vld2:
20666 case Intrinsic::arm_neon_vld3:
20667 case Intrinsic::arm_neon_vld4:
20668 case Intrinsic::arm_neon_vld2lane:
20669 case Intrinsic::arm_neon_vld3lane:
20670 case Intrinsic::arm_neon_vld4lane:
20671 case Intrinsic::arm_neon_vld2dup:
20672 case Intrinsic::arm_neon_vld3dup:
20673 case Intrinsic::arm_neon_vld4dup: {
20674 Info.opc = ISD::INTRINSIC_W_CHAIN;
20675 // Conservatively set memVT to the entire set of vectors loaded.
20676 auto &DL = I.getDataLayout();
20677 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
20678 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20679 Info.ptrVal = I.getArgOperand(0);
20680 Info.offset = 0;
20681 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
20682 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
20683 // volatile loads with NEON intrinsics not supported
20684 Info.flags = MachineMemOperand::MOLoad;
20685 return true;
20686 }
20687 case Intrinsic::arm_neon_vld1x2:
20688 case Intrinsic::arm_neon_vld1x3:
20689 case Intrinsic::arm_neon_vld1x4: {
20690 Info.opc = ISD::INTRINSIC_W_CHAIN;
20691 // Conservatively set memVT to the entire set of vectors loaded.
20692 auto &DL = I.getDataLayout();
20693 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
20694 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20695 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
20696 Info.offset = 0;
20697 Info.align = I.getParamAlign(I.arg_size() - 1).valueOrOne();
20698 // volatile loads with NEON intrinsics not supported
20699 Info.flags = MachineMemOperand::MOLoad;
20700 return true;
20701 }
20702 case Intrinsic::arm_neon_vst1:
20703 case Intrinsic::arm_neon_vst2:
20704 case Intrinsic::arm_neon_vst3:
20705 case Intrinsic::arm_neon_vst4:
20706 case Intrinsic::arm_neon_vst2lane:
20707 case Intrinsic::arm_neon_vst3lane:
20708 case Intrinsic::arm_neon_vst4lane: {
20709 Info.opc = ISD::INTRINSIC_VOID;
20710 // Conservatively set memVT to the entire set of vectors stored.
20711 auto &DL = I.getDataLayout();
20712 unsigned NumElts = 0;
20713 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
20714 Type *ArgTy = I.getArgOperand(ArgI)->getType();
20715 if (!ArgTy->isVectorTy())
20716 break;
20717 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
20718 }
20719 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20720 Info.ptrVal = I.getArgOperand(0);
20721 Info.offset = 0;
20722 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
20723 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
20724 // volatile stores with NEON intrinsics not supported
20725 Info.flags = MachineMemOperand::MOStore;
20726 return true;
20727 }
20728 case Intrinsic::arm_neon_vst1x2:
20729 case Intrinsic::arm_neon_vst1x3:
20730 case Intrinsic::arm_neon_vst1x4: {
20731 Info.opc = ISD::INTRINSIC_VOID;
20732 // Conservatively set memVT to the entire set of vectors stored.
20733 auto &DL = I.getDataLayout();
20734 unsigned NumElts = 0;
20735 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
20736 Type *ArgTy = I.getArgOperand(ArgI)->getType();
20737 if (!ArgTy->isVectorTy())
20738 break;
20739 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
20740 }
20741 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20742 Info.ptrVal = I.getArgOperand(0);
20743 Info.offset = 0;
20744 Info.align = I.getParamAlign(0).valueOrOne();
20745 // volatile stores with NEON intrinsics not supported
20746 Info.flags = MachineMemOperand::MOStore;
20747 return true;
20748 }
20749 case Intrinsic::arm_mve_vld2q:
20750 case Intrinsic::arm_mve_vld4q: {
20751 Info.opc = ISD::INTRINSIC_W_CHAIN;
20752 // Conservatively set memVT to the entire set of vectors loaded.
20753 Type *VecTy = cast<StructType>(I.getType())->getElementType(1);
20754 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vld2q ? 2 : 4;
20755 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
20756 Info.ptrVal = I.getArgOperand(0);
20757 Info.offset = 0;
20758 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
20759 // volatile loads with MVE intrinsics not supported
20760 Info.flags = MachineMemOperand::MOLoad;
20761 return true;
20762 }
20763 case Intrinsic::arm_mve_vst2q:
20764 case Intrinsic::arm_mve_vst4q: {
20765 Info.opc = ISD::INTRINSIC_VOID;
20766 // Conservatively set memVT to the entire set of vectors stored.
20767 Type *VecTy = I.getArgOperand(1)->getType();
20768 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vst2q ? 2 : 4;
20769 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
20770 Info.ptrVal = I.getArgOperand(0);
20771 Info.offset = 0;
20772 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
20773 // volatile stores with MVE intrinsics not supported
20774 Info.flags = MachineMemOperand::MOStore;
20775 return true;
20776 }
20777 case Intrinsic::arm_mve_vldr_gather_base:
20778 case Intrinsic::arm_mve_vldr_gather_base_predicated: {
20779 Info.opc = ISD::INTRINSIC_W_CHAIN;
20780 Info.ptrVal = nullptr;
20781 Info.memVT = MVT::getVT(I.getType());
20782 Info.align = Align(1);
20783 Info.flags |= MachineMemOperand::MOLoad;
20784 return true;
20785 }
20786 case Intrinsic::arm_mve_vldr_gather_base_wb:
20787 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
20788 Info.opc = ISD::INTRINSIC_W_CHAIN;
20789 Info.ptrVal = nullptr;
20790 Info.memVT = MVT::getVT(I.getType()->getContainedType(0));
20791 Info.align = Align(1);
20792 Info.flags |= MachineMemOperand::MOLoad;
20793 return true;
20794 }
20795 case Intrinsic::arm_mve_vldr_gather_offset:
20796 case Intrinsic::arm_mve_vldr_gather_offset_predicated: {
20797 Info.opc = ISD::INTRINSIC_W_CHAIN;
20798 Info.ptrVal = nullptr;
20799 MVT DataVT = MVT::getVT(I.getType());
20800 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
20801 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
20802 DataVT.getVectorNumElements());
20803 Info.align = Align(1);
20804 Info.flags |= MachineMemOperand::MOLoad;
20805 return true;
20806 }
20807 case Intrinsic::arm_mve_vstr_scatter_base:
20808 case Intrinsic::arm_mve_vstr_scatter_base_predicated: {
20809 Info.opc = ISD::INTRINSIC_VOID;
20810 Info.ptrVal = nullptr;
20811 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
20812 Info.align = Align(1);
20813 Info.flags |= MachineMemOperand::MOStore;
20814 return true;
20815 }
20816 case Intrinsic::arm_mve_vstr_scatter_base_wb:
20817 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: {
20818 Info.opc = ISD::INTRINSIC_W_CHAIN;
20819 Info.ptrVal = nullptr;
20820 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
20821 Info.align = Align(1);
20822 Info.flags |= MachineMemOperand::MOStore;
20823 return true;
20824 }
20825 case Intrinsic::arm_mve_vstr_scatter_offset:
20826 case Intrinsic::arm_mve_vstr_scatter_offset_predicated: {
20827 Info.opc = ISD::INTRINSIC_VOID;
20828 Info.ptrVal = nullptr;
20829 MVT DataVT = MVT::getVT(I.getArgOperand(2)->getType());
20830 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
20831 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
20832 DataVT.getVectorNumElements());
20833 Info.align = Align(1);
20834 Info.flags |= MachineMemOperand::MOStore;
20835 return true;
20836 }
20837 case Intrinsic::arm_ldaex:
20838 case Intrinsic::arm_ldrex: {
20839 auto &DL = I.getDataLayout();
20840 Type *ValTy = I.getParamElementType(0);
20841 Info.opc = ISD::INTRINSIC_W_CHAIN;
20842 Info.memVT = MVT::getVT(ValTy);
20843 Info.ptrVal = I.getArgOperand(0);
20844 Info.offset = 0;
20845 Info.align = DL.getABITypeAlign(ValTy);
20847 return true;
20848 }
20849 case Intrinsic::arm_stlex:
20850 case Intrinsic::arm_strex: {
20851 auto &DL = I.getDataLayout();
20852 Type *ValTy = I.getParamElementType(1);
20853 Info.opc = ISD::INTRINSIC_W_CHAIN;
20854 Info.memVT = MVT::getVT(ValTy);
20855 Info.ptrVal = I.getArgOperand(1);
20856 Info.offset = 0;
20857 Info.align = DL.getABITypeAlign(ValTy);
20859 return true;
20860 }
20861 case Intrinsic::arm_stlexd:
20862 case Intrinsic::arm_strexd:
20863 Info.opc = ISD::INTRINSIC_W_CHAIN;
20864 Info.memVT = MVT::i64;
20865 Info.ptrVal = I.getArgOperand(2);
20866 Info.offset = 0;
20867 Info.align = Align(8);
20869 return true;
20870
20871 case Intrinsic::arm_ldaexd:
20872 case Intrinsic::arm_ldrexd:
20873 Info.opc = ISD::INTRINSIC_W_CHAIN;
20874 Info.memVT = MVT::i64;
20875 Info.ptrVal = I.getArgOperand(0);
20876 Info.offset = 0;
20877 Info.align = Align(8);
20879 return true;
20880
20881 default:
20882 break;
20883 }
20884
20885 return false;
20886}
20887
20888/// Returns true if it is beneficial to convert a load of a constant
20889/// to just the constant itself.
20891 Type *Ty) const {
20892 assert(Ty->isIntegerTy());
20893
20894 unsigned Bits = Ty->getPrimitiveSizeInBits();
20895 if (Bits == 0 || Bits > 32)
20896 return false;
20897 return true;
20898}
20899
20901 unsigned Index) const {
20903 return false;
20904
20905 return (Index == 0 || Index == ResVT.getVectorNumElements());
20906}
20907
20909 ARM_MB::MemBOpt Domain) const {
20910 // First, if the target has no DMB, see what fallback we can use.
20911 if (!Subtarget->hasDataBarrier()) {
20912 // Some ARMv6 cpus can support data barriers with an mcr instruction.
20913 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
20914 // here.
20915 if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
20916 Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
20917 Builder.getInt32(0), Builder.getInt32(7),
20918 Builder.getInt32(10), Builder.getInt32(5)};
20919 return Builder.CreateIntrinsic(Intrinsic::arm_mcr, args);
20920 } else {
20921 // Instead of using barriers, atomic accesses on these subtargets use
20922 // libcalls.
20923 llvm_unreachable("makeDMB on a target so old that it has no barriers");
20924 }
20925 } else {
20926 // Only a full system barrier exists in the M-class architectures.
20927 Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
20928 Constant *CDomain = Builder.getInt32(Domain);
20929 return Builder.CreateIntrinsic(Intrinsic::arm_dmb, CDomain);
20930 }
20931}
20932
20933// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
20935 Instruction *Inst,
20936 AtomicOrdering Ord) const {
20937 switch (Ord) {
20940 llvm_unreachable("Invalid fence: unordered/non-atomic");
20943 return nullptr; // Nothing to do
20945 if (!Inst->hasAtomicStore())
20946 return nullptr; // Nothing to do
20947 [[fallthrough]];
20950 if (Subtarget->preferISHSTBarriers())
20951 return makeDMB(Builder, ARM_MB::ISHST);
20952 // FIXME: add a comment with a link to documentation justifying this.
20953 else
20954 return makeDMB(Builder, ARM_MB::ISH);
20955 }
20956 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
20957}
20958
20960 Instruction *Inst,
20961 AtomicOrdering Ord) const {
20962 switch (Ord) {
20965 llvm_unreachable("Invalid fence: unordered/not-atomic");
20968 return nullptr; // Nothing to do
20972 return makeDMB(Builder, ARM_MB::ISH);
20973 }
20974 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
20975}
20976
20977// Loads and stores less than 64-bits are already atomic; ones above that
20978// are doomed anyway, so defer to the default libcall and blame the OS when
20979// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
20980// anything for those.
20983 bool has64BitAtomicStore;
20984 if (Subtarget->isMClass())
20985 has64BitAtomicStore = false;
20986 else if (Subtarget->isThumb())
20987 has64BitAtomicStore = Subtarget->hasV7Ops();
20988 else
20989 has64BitAtomicStore = Subtarget->hasV6Ops();
20990
20991 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
20992 return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand
20994}
20995
20996// Loads and stores less than 64-bits are already atomic; ones above that
20997// are doomed anyway, so defer to the default libcall and blame the OS when
20998// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
20999// anything for those.
21000// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
21001// guarantee, see DDI0406C ARM architecture reference manual,
21002// sections A8.8.72-74 LDRD)
21005 bool has64BitAtomicLoad;
21006 if (Subtarget->isMClass())
21007 has64BitAtomicLoad = false;
21008 else if (Subtarget->isThumb())
21009 has64BitAtomicLoad = Subtarget->hasV7Ops();
21010 else
21011 has64BitAtomicLoad = Subtarget->hasV6Ops();
21012
21013 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
21014 return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly
21016}
21017
21018// For the real atomic operations, we have ldrex/strex up to 32 bits,
21019// and up to 64 bits on the non-M profiles
21022 if (AI->isFloatingPointOperation())
21024
21025 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21026 bool hasAtomicRMW;
21027 if (Subtarget->isMClass())
21028 hasAtomicRMW = Subtarget->hasV8MBaselineOps();
21029 else if (Subtarget->isThumb())
21030 hasAtomicRMW = Subtarget->hasV7Ops();
21031 else
21032 hasAtomicRMW = Subtarget->hasV6Ops();
21033 if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
21034 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21035 // implement atomicrmw without spilling. If the target address is also on
21036 // the stack and close enough to the spill slot, this can lead to a
21037 // situation where the monitor always gets cleared and the atomic operation
21038 // can never succeed. So at -O0 lower this operation to a CAS loop.
21039 if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None)
21042 }
21044}
21045
21046// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
21047// bits, and up to 64 bits on the non-M profiles.
21050 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21051 // implement cmpxchg without spilling. If the address being exchanged is also
21052 // on the stack and close enough to the spill slot, this can lead to a
21053 // situation where the monitor always gets cleared and the atomic operation
21054 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
21055 unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
21056 bool HasAtomicCmpXchg;
21057 if (Subtarget->isMClass())
21058 HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
21059 else if (Subtarget->isThumb())
21060 HasAtomicCmpXchg = Subtarget->hasV7Ops();
21061 else
21062 HasAtomicCmpXchg = Subtarget->hasV6Ops();
21063 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None &&
21064 HasAtomicCmpXchg && Size <= (Subtarget->isMClass() ? 32U : 64U))
21067}
21068
21070 const Instruction *I) const {
21071 return InsertFencesForAtomic;
21072}
21073
21075 // ROPI/RWPI are not supported currently.
21076 return !Subtarget->isROPI() && !Subtarget->isRWPI();
21077}
21078
21080 // MSVC CRT provides functionalities for stack protection.
21081 RTLIB::LibcallImpl SecurityCheckCookieLibcall =
21082 getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);
21083
21084 RTLIB::LibcallImpl SecurityCookieVar =
21085 getLibcallImpl(RTLIB::STACK_CHECK_GUARD);
21086 if (SecurityCheckCookieLibcall != RTLIB::Unsupported &&
21087 SecurityCookieVar != RTLIB::Unsupported) {
21088 // MSVC CRT has a global variable holding security cookie.
21089 M.getOrInsertGlobal(getLibcallImplName(SecurityCookieVar),
21090 PointerType::getUnqual(M.getContext()));
21091
21092 // MSVC CRT has a function to validate security cookie.
21093 FunctionCallee SecurityCheckCookie =
21094 M.getOrInsertFunction(getLibcallImplName(SecurityCheckCookieLibcall),
21095 Type::getVoidTy(M.getContext()),
21096 PointerType::getUnqual(M.getContext()));
21097 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
21098 F->addParamAttr(0, Attribute::AttrKind::InReg);
21099 }
21100
21102}
21103
21105 unsigned &Cost) const {
21106 // If we do not have NEON, vector types are not natively supported.
21107 if (!Subtarget->hasNEON())
21108 return false;
21109
21110 // Floating point values and vector values map to the same register file.
21111 // Therefore, although we could do a store extract of a vector type, this is
21112 // better to leave at float as we have more freedom in the addressing mode for
21113 // those.
21114 if (VectorTy->isFPOrFPVectorTy())
21115 return false;
21116
21117 // If the index is unknown at compile time, this is very expensive to lower
21118 // and it is not possible to combine the store with the extract.
21119 if (!isa<ConstantInt>(Idx))
21120 return false;
21121
21122 assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
21123 unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedValue();
21124 // We can do a store + vector extract on any vector that fits perfectly in a D
21125 // or Q register.
21126 if (BitWidth == 64 || BitWidth == 128) {
21127 Cost = 0;
21128 return true;
21129 }
21130 return false;
21131}
21132
21134 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21135 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
21136 unsigned Opcode = Op.getOpcode();
21137 switch (Opcode) {
21138 case ARMISD::VORRIMM:
21139 case ARMISD::VBICIMM:
21140 return false;
21141 }
21143 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
21144}
21145
21147 return Subtarget->hasV5TOps() && !Subtarget->isThumb1Only();
21148}
21149
21151 return Subtarget->hasV5TOps() && !Subtarget->isThumb1Only();
21152}
21153
21155 const Instruction &AndI) const {
21156 if (!Subtarget->hasV7Ops())
21157 return false;
21158
21159 // Sink the `and` instruction only if the mask would fit into a modified
21160 // immediate operand.
21162 if (!Mask || Mask->getValue().getBitWidth() > 32u)
21163 return false;
21164 auto MaskVal = unsigned(Mask->getValue().getZExtValue());
21165 return (Subtarget->isThumb2() ? ARM_AM::getT2SOImmVal(MaskVal)
21166 : ARM_AM::getSOImmVal(MaskVal)) != -1;
21167}
21168
21171 SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const {
21172 if (Subtarget->hasMinSize() && !Subtarget->isTargetWindows())
21175 ExpansionFactor);
21176}
21177
21179 Value *Addr,
21180 AtomicOrdering Ord) const {
21181 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21182 bool IsAcquire = isAcquireOrStronger(Ord);
21183
21184 // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
21185 // intrinsic must return {i32, i32} and we have to recombine them into a
21186 // single i64 here.
21187 if (ValueTy->getPrimitiveSizeInBits() == 64) {
21189 IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
21190
21191 Value *LoHi =
21192 Builder.CreateIntrinsic(Int, Addr, /*FMFSource=*/nullptr, "lohi");
21193
21194 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
21195 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
21196 if (!Subtarget->isLittle())
21197 std::swap (Lo, Hi);
21198 Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
21199 Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
21200 return Builder.CreateOr(
21201 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 32)), "val64");
21202 }
21203
21204 Type *Tys[] = { Addr->getType() };
21205 Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
21206 CallInst *CI = Builder.CreateIntrinsic(Int, Tys, Addr);
21207
21208 CI->addParamAttr(
21209 0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));
21210 return Builder.CreateTruncOrBitCast(CI, ValueTy);
21211}
21212
21214 IRBuilderBase &Builder) const {
21215 if (!Subtarget->hasV7Ops())
21216 return;
21217 Builder.CreateIntrinsic(Intrinsic::arm_clrex, {});
21218}
21219
21221 Value *Val, Value *Addr,
21222 AtomicOrdering Ord) const {
21223 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21224 bool IsRelease = isReleaseOrStronger(Ord);
21225
21226 // Since the intrinsics must have legal type, the i64 intrinsics take two
21227 // parameters: "i32, i32". We must marshal Val into the appropriate form
21228 // before the call.
21229 if (Val->getType()->getPrimitiveSizeInBits() == 64) {
21231 IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
21232 Type *Int32Ty = Type::getInt32Ty(M->getContext());
21233
21234 Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
21235 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
21236 if (!Subtarget->isLittle())
21237 std::swap(Lo, Hi);
21238 return Builder.CreateIntrinsic(Int, {Lo, Hi, Addr});
21239 }
21240
21241 Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
21242 Type *Tys[] = { Addr->getType() };
21244
21245 CallInst *CI = Builder.CreateCall(
21246 Strex, {Builder.CreateZExtOrBitCast(
21247 Val, Strex->getFunctionType()->getParamType(0)),
21248 Addr});
21249 CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType,
21250 Val->getType()));
21251 return CI;
21252}
21253
21254
21256 return Subtarget->isMClass();
21257}
21258
21259/// A helper function for determining the number of interleaved accesses we
21260/// will generate when lowering accesses of the given type.
21261unsigned
21263 const DataLayout &DL) const {
21264 return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
21265}
21266
21268 unsigned Factor, FixedVectorType *VecTy, Align Alignment,
21269 const DataLayout &DL) const {
21270
21271 unsigned VecSize = DL.getTypeSizeInBits(VecTy);
21272 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
21273
21274 if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())
21275 return false;
21276
21277 // Ensure the vector doesn't have f16 elements. Even though we could do an
21278 // i16 vldN, we can't hold the f16 vectors and will end up converting via
21279 // f32.
21280 if (Subtarget->hasNEON() && VecTy->getElementType()->isHalfTy())
21281 return false;
21282 if (Subtarget->hasMVEIntegerOps() && Factor == 3)
21283 return false;
21284
21285 // Ensure the number of vector elements is greater than 1.
21286 if (VecTy->getNumElements() < 2)
21287 return false;
21288
21289 // Ensure the element type is legal.
21290 if (ElSize != 8 && ElSize != 16 && ElSize != 32)
21291 return false;
21292 // And the alignment if high enough under MVE.
21293 if (Subtarget->hasMVEIntegerOps() && Alignment < ElSize / 8)
21294 return false;
21295
21296 // Ensure the total vector size is 64 or a multiple of 128. Types larger than
21297 // 128 will be split into multiple interleaved accesses.
21298 if (Subtarget->hasNEON() && VecSize == 64)
21299 return true;
21300 return VecSize % 128 == 0;
21301}
21302
21304 if (Subtarget->hasNEON())
21305 return 4;
21306 if (Subtarget->hasMVEIntegerOps())
21309}
21310
21311/// Lower an interleaved load into a vldN intrinsic.
21312///
21313/// E.g. Lower an interleaved load (Factor = 2):
21314/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
21315/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21316/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21317///
21318/// Into:
21319/// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
21320/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
21321/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
21323 Instruction *Load, Value *Mask, ArrayRef<ShuffleVectorInst *> Shuffles,
21324 ArrayRef<unsigned> Indices, unsigned Factor, const APInt &GapMask) const {
21325 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21326 "Invalid interleave factor");
21327 assert(!Shuffles.empty() && "Empty shufflevector input");
21328 assert(Shuffles.size() == Indices.size() &&
21329 "Unmatched number of shufflevectors and indices");
21330
21331 auto *LI = dyn_cast<LoadInst>(Load);
21332 if (!LI)
21333 return false;
21334 assert(!Mask && GapMask.popcount() == Factor && "Unexpected mask on a load");
21335
21336 auto *VecTy = cast<FixedVectorType>(Shuffles[0]->getType());
21337 Type *EltTy = VecTy->getElementType();
21338
21339 const DataLayout &DL = LI->getDataLayout();
21340 Align Alignment = LI->getAlign();
21341
21342 // Skip if we do not have NEON and skip illegal vector types. We can
21343 // "legalize" wide vector types into multiple interleaved accesses as long as
21344 // the vector types are divisible by 128.
21345 if (!isLegalInterleavedAccessType(Factor, VecTy, Alignment, DL))
21346 return false;
21347
21348 unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
21349
21350 // A pointer vector can not be the return type of the ldN intrinsics. Need to
21351 // load integer vectors first and then convert to pointer vectors.
21352 if (EltTy->isPointerTy())
21353 VecTy = FixedVectorType::get(DL.getIntPtrType(EltTy), VecTy);
21354
21355 IRBuilder<> Builder(LI);
21356
21357 // The base address of the load.
21358 Value *BaseAddr = LI->getPointerOperand();
21359
21360 if (NumLoads > 1) {
21361 // If we're going to generate more than one load, reset the sub-vector type
21362 // to something legal.
21363 VecTy = FixedVectorType::get(VecTy->getElementType(),
21364 VecTy->getNumElements() / NumLoads);
21365 }
21366
21367 assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
21368
21369 auto createLoadIntrinsic = [&](Value *BaseAddr) {
21370 if (Subtarget->hasNEON()) {
21371 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21372 Type *Tys[] = {VecTy, PtrTy};
21373 static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
21374 Intrinsic::arm_neon_vld3,
21375 Intrinsic::arm_neon_vld4};
21376
21378 Ops.push_back(BaseAddr);
21379 Ops.push_back(Builder.getInt32(LI->getAlign().value()));
21380
21381 return Builder.CreateIntrinsic(LoadInts[Factor - 2], Tys, Ops,
21382 /*FMFSource=*/nullptr, "vldN");
21383 } else {
21384 assert((Factor == 2 || Factor == 4) &&
21385 "expected interleave factor of 2 or 4 for MVE");
21386 Intrinsic::ID LoadInts =
21387 Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
21388 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21389 Type *Tys[] = {VecTy, PtrTy};
21390
21392 Ops.push_back(BaseAddr);
21393 return Builder.CreateIntrinsic(LoadInts, Tys, Ops, /*FMFSource=*/nullptr,
21394 "vldN");
21395 }
21396 };
21397
21398 // Holds sub-vectors extracted from the load intrinsic return values. The
21399 // sub-vectors are associated with the shufflevector instructions they will
21400 // replace.
21402
21403 for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
21404 // If we're generating more than one load, compute the base address of
21405 // subsequent loads as an offset from the previous.
21406 if (LoadCount > 0)
21407 BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
21408 VecTy->getNumElements() * Factor);
21409
21410 CallInst *VldN = createLoadIntrinsic(BaseAddr);
21411
21412 // Replace uses of each shufflevector with the corresponding vector loaded
21413 // by ldN.
21414 for (unsigned i = 0; i < Shuffles.size(); i++) {
21415 ShuffleVectorInst *SV = Shuffles[i];
21416 unsigned Index = Indices[i];
21417
21418 Value *SubVec = Builder.CreateExtractValue(VldN, Index);
21419
21420 // Convert the integer vector to pointer vector if the element is pointer.
21421 if (EltTy->isPointerTy())
21422 SubVec = Builder.CreateIntToPtr(
21423 SubVec,
21425
21426 SubVecs[SV].push_back(SubVec);
21427 }
21428 }
21429
21430 // Replace uses of the shufflevector instructions with the sub-vectors
21431 // returned by the load intrinsic. If a shufflevector instruction is
21432 // associated with more than one sub-vector, those sub-vectors will be
21433 // concatenated into a single wide vector.
21434 for (ShuffleVectorInst *SVI : Shuffles) {
21435 auto &SubVec = SubVecs[SVI];
21436 auto *WideVec =
21437 SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
21438 SVI->replaceAllUsesWith(WideVec);
21439 }
21440
21441 return true;
21442}
21443
21444/// Lower an interleaved store into a vstN intrinsic.
21445///
21446/// E.g. Lower an interleaved store (Factor = 3):
21447/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21448/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21449/// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
21450///
21451/// Into:
21452/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21453/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21454/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21455/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21456///
21457/// Note that the new shufflevectors will be removed and we'll only generate one
21458/// vst3 instruction in CodeGen.
21459///
21460/// Example for a more general valid mask (Factor 3). Lower:
21461/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
21462/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
21463/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21464///
21465/// Into:
21466/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
21467/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
21468/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
21469/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21471 Value *LaneMask,
21472 ShuffleVectorInst *SVI,
21473 unsigned Factor,
21474 const APInt &GapMask) const {
21475 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21476 "Invalid interleave factor");
21477 auto *SI = dyn_cast<StoreInst>(Store);
21478 if (!SI)
21479 return false;
21480 assert(!LaneMask && GapMask.popcount() == Factor &&
21481 "Unexpected mask on store");
21482
21483 auto *VecTy = cast<FixedVectorType>(SVI->getType());
21484 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
21485
21486 unsigned LaneLen = VecTy->getNumElements() / Factor;
21487 Type *EltTy = VecTy->getElementType();
21488 auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
21489
21490 const DataLayout &DL = SI->getDataLayout();
21491 Align Alignment = SI->getAlign();
21492
21493 // Skip if we do not have NEON and skip illegal vector types. We can
21494 // "legalize" wide vector types into multiple interleaved accesses as long as
21495 // the vector types are divisible by 128.
21496 if (!isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, DL))
21497 return false;
21498
21499 unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
21500
21501 Value *Op0 = SVI->getOperand(0);
21502 Value *Op1 = SVI->getOperand(1);
21503 IRBuilder<> Builder(SI);
21504
21505 // StN intrinsics don't support pointer vectors as arguments. Convert pointer
21506 // vectors to integer vectors.
21507 if (EltTy->isPointerTy()) {
21508 Type *IntTy = DL.getIntPtrType(EltTy);
21509
21510 // Convert to the corresponding integer vector.
21511 auto *IntVecTy =
21513 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
21514 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
21515
21516 SubVecTy = FixedVectorType::get(IntTy, LaneLen);
21517 }
21518
21519 // The base address of the store.
21520 Value *BaseAddr = SI->getPointerOperand();
21521
21522 if (NumStores > 1) {
21523 // If we're going to generate more than one store, reset the lane length
21524 // and sub-vector type to something legal.
21525 LaneLen /= NumStores;
21526 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
21527 }
21528
21529 assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
21530
21531 auto Mask = SVI->getShuffleMask();
21532
21533 auto createStoreIntrinsic = [&](Value *BaseAddr,
21534 SmallVectorImpl<Value *> &Shuffles) {
21535 if (Subtarget->hasNEON()) {
21536 static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
21537 Intrinsic::arm_neon_vst3,
21538 Intrinsic::arm_neon_vst4};
21539 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21540 Type *Tys[] = {PtrTy, SubVecTy};
21541
21543 Ops.push_back(BaseAddr);
21544 append_range(Ops, Shuffles);
21545 Ops.push_back(Builder.getInt32(SI->getAlign().value()));
21546 Builder.CreateIntrinsic(StoreInts[Factor - 2], Tys, Ops);
21547 } else {
21548 assert((Factor == 2 || Factor == 4) &&
21549 "expected interleave factor of 2 or 4 for MVE");
21550 Intrinsic::ID StoreInts =
21551 Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
21552 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21553 Type *Tys[] = {PtrTy, SubVecTy};
21554
21556 Ops.push_back(BaseAddr);
21557 append_range(Ops, Shuffles);
21558 for (unsigned F = 0; F < Factor; F++) {
21559 Ops.push_back(Builder.getInt32(F));
21560 Builder.CreateIntrinsic(StoreInts, Tys, Ops);
21561 Ops.pop_back();
21562 }
21563 }
21564 };
21565
21566 for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
21567 // If we generating more than one store, we compute the base address of
21568 // subsequent stores as an offset from the previous.
21569 if (StoreCount > 0)
21570 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
21571 BaseAddr, LaneLen * Factor);
21572
21573 SmallVector<Value *, 4> Shuffles;
21574
21575 // Split the shufflevector operands into sub vectors for the new vstN call.
21576 for (unsigned i = 0; i < Factor; i++) {
21577 unsigned IdxI = StoreCount * LaneLen * Factor + i;
21578 if (Mask[IdxI] >= 0) {
21579 Shuffles.push_back(Builder.CreateShuffleVector(
21580 Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
21581 } else {
21582 unsigned StartMask = 0;
21583 for (unsigned j = 1; j < LaneLen; j++) {
21584 unsigned IdxJ = StoreCount * LaneLen * Factor + j;
21585 if (Mask[IdxJ * Factor + IdxI] >= 0) {
21586 StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
21587 break;
21588 }
21589 }
21590 // Note: If all elements in a chunk are undefs, StartMask=0!
21591 // Note: Filling undef gaps with random elements is ok, since
21592 // those elements were being written anyway (with undefs).
21593 // In the case of all undefs we're defaulting to using elems from 0
21594 // Note: StartMask cannot be negative, it's checked in
21595 // isReInterleaveMask
21596 Shuffles.push_back(Builder.CreateShuffleVector(
21597 Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
21598 }
21599 }
21600
21601 createStoreIntrinsic(BaseAddr, Shuffles);
21602 }
21603 return true;
21604}
21605
21613
21615 uint64_t &Members) {
21616 if (auto *ST = dyn_cast<StructType>(Ty)) {
21617 for (unsigned i = 0; i < ST->getNumElements(); ++i) {
21618 uint64_t SubMembers = 0;
21619 if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
21620 return false;
21621 Members += SubMembers;
21622 }
21623 } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
21624 uint64_t SubMembers = 0;
21625 if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
21626 return false;
21627 Members += SubMembers * AT->getNumElements();
21628 } else if (Ty->isFloatTy()) {
21629 if (Base != HA_UNKNOWN && Base != HA_FLOAT)
21630 return false;
21631 Members = 1;
21632 Base = HA_FLOAT;
21633 } else if (Ty->isDoubleTy()) {
21634 if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
21635 return false;
21636 Members = 1;
21637 Base = HA_DOUBLE;
21638 } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
21639 Members = 1;
21640 switch (Base) {
21641 case HA_FLOAT:
21642 case HA_DOUBLE:
21643 return false;
21644 case HA_VECT64:
21645 return VT->getPrimitiveSizeInBits().getFixedValue() == 64;
21646 case HA_VECT128:
21647 return VT->getPrimitiveSizeInBits().getFixedValue() == 128;
21648 case HA_UNKNOWN:
21649 switch (VT->getPrimitiveSizeInBits().getFixedValue()) {
21650 case 64:
21651 Base = HA_VECT64;
21652 return true;
21653 case 128:
21654 Base = HA_VECT128;
21655 return true;
21656 default:
21657 return false;
21658 }
21659 }
21660 }
21661
21662 return (Members > 0 && Members <= 4);
21663}
21664
21665/// Return the correct alignment for the current calling convention.
21667 Type *ArgTy, const DataLayout &DL) const {
21668 const Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
21669 if (!ArgTy->isVectorTy())
21670 return ABITypeAlign;
21671
21672 // Avoid over-aligning vector parameters. It would require realigning the
21673 // stack and waste space for no real benefit.
21674 MaybeAlign StackAlign = DL.getStackAlignment();
21675 assert(StackAlign && "data layout string is missing stack alignment");
21676 return std::min(ABITypeAlign, *StackAlign);
21677}
21678
21679/// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
21680/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
21681/// passing according to AAPCS rules.
21683 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
21684 const DataLayout &DL) const {
21685 if (getEffectiveCallingConv(CallConv, isVarArg) !=
21687 return false;
21688
21690 uint64_t Members = 0;
21691 bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
21692 LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
21693
21694 bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
21695 return IsHA || IsIntArray;
21696}
21697
21699 const Constant *PersonalityFn) const {
21700 // Platforms which do not use SjLj EH may return values in these registers
21701 // via the personality function.
21703 return EM == ExceptionHandling::SjLj ? Register() : ARM::R0;
21704}
21705
21707 const Constant *PersonalityFn) const {
21708 // Platforms which do not use SjLj EH may return values in these registers
21709 // via the personality function.
21711 return EM == ExceptionHandling::SjLj ? Register() : ARM::R1;
21712}
21713
21714void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
21715 // Update IsSplitCSR in ARMFunctionInfo.
21716 ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
21717 AFI->setIsSplitCSR(true);
21718}
21719
21720void ARMTargetLowering::insertCopiesSplitCSR(
21721 MachineBasicBlock *Entry,
21722 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
21723 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
21724 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
21725 if (!IStart)
21726 return;
21727
21728 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
21729 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
21730 MachineBasicBlock::iterator MBBI = Entry->begin();
21731 for (const MCPhysReg *I = IStart; *I; ++I) {
21732 const TargetRegisterClass *RC = nullptr;
21733 if (ARM::GPRRegClass.contains(*I))
21734 RC = &ARM::GPRRegClass;
21735 else if (ARM::DPRRegClass.contains(*I))
21736 RC = &ARM::DPRRegClass;
21737 else
21738 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
21739
21740 Register NewVR = MRI->createVirtualRegister(RC);
21741 // Create copy from CSR to a virtual register.
21742 // FIXME: this currently does not emit CFI pseudo-instructions, it works
21743 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
21744 // nounwind. If we want to generalize this later, we may need to emit
21745 // CFI pseudo-instructions.
21746 assert(Entry->getParent()->getFunction().hasFnAttribute(
21747 Attribute::NoUnwind) &&
21748 "Function should be nounwind in insertCopiesSplitCSR!");
21749 Entry->addLiveIn(*I);
21750 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
21751 .addReg(*I);
21752
21753 // Insert the copy-back instructions right before the terminator.
21754 for (auto *Exit : Exits)
21755 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
21756 TII->get(TargetOpcode::COPY), *I)
21757 .addReg(NewVR);
21758 }
21759}
21760
21765
21767 return Subtarget->hasMVEIntegerOps();
21768}
21769
21772 auto *VTy = dyn_cast<FixedVectorType>(Ty);
21773 if (!VTy)
21774 return false;
21775
21776 auto *ScalarTy = VTy->getScalarType();
21777 unsigned NumElements = VTy->getNumElements();
21778
21779 unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
21780 if (VTyWidth < 128 || !llvm::isPowerOf2_32(VTyWidth))
21781 return false;
21782
21783 // Both VCADD and VCMUL/VCMLA support the same types, F16 and F32
21784 if (ScalarTy->isHalfTy() || ScalarTy->isFloatTy())
21785 return Subtarget->hasMVEFloatOps();
21786
21788 return false;
21789
21790 return Subtarget->hasMVEIntegerOps() &&
21791 (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
21792 ScalarTy->isIntegerTy(32));
21793}
21794
21796 static const MCPhysReg RCRegs[] = {ARM::FPSCR_RM};
21797 return RCRegs;
21798}
21799
21802 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
21803 Value *Accumulator) const {
21804
21806
21807 unsigned TyWidth = Ty->getScalarSizeInBits() * Ty->getNumElements();
21808
21809 assert(TyWidth >= 128 && "Width of vector type must be at least 128 bits");
21810
21811 if (TyWidth > 128) {
21812 int Stride = Ty->getNumElements() / 2;
21813 auto SplitSeq = llvm::seq<int>(0, Ty->getNumElements());
21814 auto SplitSeqVec = llvm::to_vector(SplitSeq);
21815 ArrayRef<int> LowerSplitMask(&SplitSeqVec[0], Stride);
21816 ArrayRef<int> UpperSplitMask(&SplitSeqVec[Stride], Stride);
21817
21818 auto *LowerSplitA = B.CreateShuffleVector(InputA, LowerSplitMask);
21819 auto *LowerSplitB = B.CreateShuffleVector(InputB, LowerSplitMask);
21820 auto *UpperSplitA = B.CreateShuffleVector(InputA, UpperSplitMask);
21821 auto *UpperSplitB = B.CreateShuffleVector(InputB, UpperSplitMask);
21822 Value *LowerSplitAcc = nullptr;
21823 Value *UpperSplitAcc = nullptr;
21824
21825 if (Accumulator) {
21826 LowerSplitAcc = B.CreateShuffleVector(Accumulator, LowerSplitMask);
21827 UpperSplitAcc = B.CreateShuffleVector(Accumulator, UpperSplitMask);
21828 }
21829
21830 auto *LowerSplitInt = createComplexDeinterleavingIR(
21831 B, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
21832 auto *UpperSplitInt = createComplexDeinterleavingIR(
21833 B, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
21834
21835 ArrayRef<int> JoinMask(&SplitSeqVec[0], Ty->getNumElements());
21836 return B.CreateShuffleVector(LowerSplitInt, UpperSplitInt, JoinMask);
21837 }
21838
21839 auto *IntTy = Type::getInt32Ty(B.getContext());
21840
21841 ConstantInt *ConstRotation = nullptr;
21842 if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {
21843 ConstRotation = ConstantInt::get(IntTy, (int)Rotation);
21844
21845 if (Accumulator)
21846 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmlaq, Ty,
21847 {ConstRotation, Accumulator, InputB, InputA});
21848 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmulq, Ty,
21849 {ConstRotation, InputB, InputA});
21850 }
21851
21852 if (OperationType == ComplexDeinterleavingOperation::CAdd) {
21853 // 1 means the value is not halved.
21854 auto *ConstHalving = ConstantInt::get(IntTy, 1);
21855
21857 ConstRotation = ConstantInt::get(IntTy, 0);
21859 ConstRotation = ConstantInt::get(IntTy, 1);
21860
21861 if (!ConstRotation)
21862 return nullptr; // Invalid rotation for arm_mve_vcaddq
21863
21864 return B.CreateIntrinsic(Intrinsic::arm_mve_vcaddq, Ty,
21865 {ConstHalving, ConstRotation, InputA, InputB});
21866 }
21867
21868 return nullptr;
21869}
unsigned const MachineRegisterInfo * MRI
static bool isAddSubSExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isExtendedBUILD_VECTOR(SDValue N, SelectionDAG &DAG, bool isSigned)
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
return SDValue()
static const MCPhysReg GPRArgRegs[]
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
constexpr MVT FlagsVT
Value type used for NZCV flags.
static bool isNegatedInteger(SDValue Op)
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
static bool isAddSubZExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static bool isConstant(const MachineInstr &MI)
constexpr LLT F64
constexpr LLT S1
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG)
static bool isStore(int Opcode)
static bool isThumb(const MCSubtargetInfo &STI)
static SDValue LowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isIncompatibleReg(const MCPhysReg &PR, MVT VT)
static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG)
static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total value size to 64 bits.
static cl::opt< unsigned > ConstpoolPromotionMaxSize("arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), cl::init(64))
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isVTBLMask(ArrayRef< int > M, EVT VT)
static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
static cl::opt< bool > EnableConstpoolPromotion("arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), cl::init(false))
static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformExtractFpToIntStores(StoreSDNode *St, SelectionDAG &DAG)
static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
static SDValue PerformExtractEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static const APInt * isPowerOf2Constant(SDValue V)
static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) can replace combinations of ...
static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
static SDValue LowerVECTOR_SHUFFLEUsingOneOff(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static bool isValidMVECond(unsigned CC, bool IsFloat)
static SDValue PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC)
IntCCToARMCC - Convert a DAG integer condition code to an ARM CC.
static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSTORECombine - Target-specific dag combine xforms for ISD::STORE.
static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, SelectionDAG &DAG)
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isGTorGE(ISD::CondCode CC)
static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) intrinsic,...
static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask)
static bool isReverseMask(ArrayRef< int > M, EVT VT)
static bool isVZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of "vector_shuffle v,...
static SDValue PerformSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVECTOR_REG_CASTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVMulVCTPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVMulVCTPCombine - VCVT (fixed-point to floating-point, Advanced SIMD) can replace combinations...
static SDValue createGPRPairNode2xi32(SelectionDAG &DAG, SDValue V0, SDValue V1)
static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG)
static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc)
static bool isVTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool CanInvertMVEVCMP(SDValue N)
static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG)
static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
PerformShiftCombine - Checks for immediate versions of vector shifts and lowers them.
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2)
FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static EVT getVectorTyFromPredicateVector(EVT VT)
static SDValue PerformFADDVCMLACombine(SDNode *N, SelectionDAG &DAG)
static SDValue handleCMSEValue(const SDValue &Value, const ISD::InputArg &Arg, SelectionDAG &DAG, const SDLoc &DL)
static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
static bool isSRL16(const SDValue &Op)
static SDValue PerformVMOVrhCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC)
static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr, SDValue Inc, const SelectionDAG &DAG)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static Register genTPEntry(MachineBasicBlock *TpEntry, MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpExit, Register OpSizeReg, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI)
Adds logic in loop entry MBB to calculate loop iteration count and adds t2WhileLoopSetup and t2WhileL...
static SDValue createGPRPairNodei64(SelectionDAG &DAG, SDValue V)
static bool isLTorLE(ISD::CondCode CC)
static SDValue PerformVCMPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformMVEVMULLCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, SelectionDAG &DAG)
static SDValue performNegCMovCombine(SDNode *N, SelectionDAG &DAG)
static EVT getExtensionTo64Bits(const EVT &OrigVT)
static SDValue PerformBITCASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG)
static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock *BB, const TargetRegisterInfo *TRI)
static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG)
static bool hasNormalLoadOperand(SDNode *N)
hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node are normal,...
static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformInsertEltCombine - Target-specific dag combine xforms for ISD::INSERT_VECTOR_ELT.
static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVDUPLANECombine - Target-specific dag combine xforms for ARMISD::VDUPLANE.
static SDValue LowerBuildVectorOfFPTrunc(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static cl::opt< unsigned > ConstpoolPromotionMaxTotal("arm-promote-constant-max-total", cl::Hidden, cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128))
static SDValue LowerTruncatei1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static RTLIB::Libcall getDivRemLibcall(const SDNode *N, MVT::SimpleValueType SVT)
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG &DAG)
SkipLoadExtensionForVMULL - return a load of the original vector size that does not do any sign/zero ...
static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.
static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT, SelectionDAG &DAG)
static bool isVZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformORCombineToSMULWBT(SDNode *OR, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isVTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of "vector_shuffle v,...
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue FindBFIToCombineWith(SDNode *N)
static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, SelectionDAG &DAG)
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps)
static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isS16(const SDValue &Op, SelectionDAG &DAG)
static bool isSRA16(const SDValue &Op)
static SDValue AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue LowerInterruptReturn(SmallVectorImpl< SDValue > &RetOps, const SDLoc &DL, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, SelectionDAG &DAG)
static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2)
static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isSHL16(const SDValue &Op)
static bool isVEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseVEXT, unsigned &Imm)
static SDValue PerformMVEVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
cl::opt< unsigned > ArmMaxBaseUpdatesToCheck("arm-max-base-updates-to-check", cl::Hidden, cl::desc("Maximum number of base-updates to check generating postindex."), cl::init(64))
static bool isTruncMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2)
Return the load opcode for a given load size.
static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
static bool isLegalMVEShuffleOp(unsigned PFEntry)
static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformShuffleVMOVNCombine(ShuffleVectorSDNode *N, SelectionDAG &DAG)
static bool isVUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG)
PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for ISD::VECTOR_SHUFFLE.
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG)
SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, ANY_EXTEND,...
static bool isVMOVNTruncMask(ArrayRef< int > M, EVT ToVT, bool rev)
static SDValue PerformVQMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
static SDValue LowerVecReduceMinMax(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformAddcSubcCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static TargetLowering::ArgListTy getDivRemArgList(const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget)
static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getZeroVector - Returns a vector of specified type with all zero elements.
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static ARMCC::CondCodes getVCMPCondCode(SDValue N)
static cl::opt< bool > ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true))
static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformORCombineToBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVSetCCToVCTPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerBUILD_VECTORToVIDUP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isZeroVector(SDValue N)
static SDValue PerformAddeSubeCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void ReplaceCMP_SWAP_64Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG)
static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment store operation with given size.
static bool isVMOVNMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, NEON load/store intrinsics,...
static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformCSETCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue PerformInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorExtend(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain)
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMULCombine Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the special multi...
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformORCombine - Target-specific dag combine xforms for ISD::OR.
static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG)
static SDValue PerformTruncatingStoreCombine(StoreSDNode *St, SelectionDAG &DAG)
static unsigned SelectPairHalf(unsigned Elements, ArrayRef< int > Mask, unsigned Index)
static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment load operation with given size.
static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG)
static bool isValidBaseUpdate(SDNode *N, SDNode *User)
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, const SDLoc &dl)
static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op)
static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment, bool isSEXTLoad, bool IsMasked, bool isLE, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
std::pair< unsigned, const TargetRegisterClass * > RCPair
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, ISD::ZERO_EXTEND,...
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, EVT VectorVT, VMOVModImmType type)
isVMOVModifiedImm - Check if the specified splat value corresponds to a valid vector constant for a N...
static SDValue LowerBuildVectorOfFPExt(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, SelectionDAG &DAG)
BC is a bitcast that is about to be turned into a VMOVDRR.
static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, const SDLoc &dl)
static unsigned isNEONTwoResultShuffleMask(ArrayRef< int > ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF)
Check if ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), and return the corresponding AR...
static bool BitsProperlyConcatenate(const APInt &A, const APInt &B)
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static SDValue LowerVecReduce(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, struct BaseUpdateUser &User, bool SimpleConstIncOnly, TargetLowering::DAGCombinerInfo &DCI)
static bool allUsersAreInFunction(const Value *V, const Function *F)
Return true if all users of V are within function F, looking through ConstantExprs.
static bool isSingletonVEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG)
PerformVMOVDRRCombine - Target-specific dag combine xforms for ARMISD::VMOVDRR.
static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, SDValue &SatK)
static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
isLegalAddressImmediate - Return true if the integer value can be used as the offset of the target ad...
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isLegalT1AddressImmediate(int64_t V, EVT VT)
static SDValue CombineANDShift(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformADDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDECombine - Target-specific dag combine transform from ARMISD::ADDC, ARMISD::ADDE,...
static SDValue PerformReduceShuffleCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerTruncate(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformHWLoopCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformSplittingMVETruncToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool isVUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of "vector_shuffle v,...
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members)
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG)
static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm, bool &Negate)
static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget)
canChangeToInt - Given the fp compare operand, return true if it is suitable to morph to an integer c...
static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2)
Return the store opcode for a given store size.
static bool IsVUZPShuffleNode(SDNode *N)
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, MachineInstr &MI, const SDNode *Node)
Attaches vregs to MEMCPY that it will use as scratch registers when it is expanded into LDM/STM.
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
static SDValue findMUL_LOHI(SDValue V)
static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformORCombine_i1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformSplittingMVEEXTToWideningLoad(SDNode *N, SelectionDAG &DAG)
static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG)
static void genTPLoopBody(MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI, Register OpSrcReg, Register OpDestReg, Register ElementCountReg, Register TotalIterationsReg, bool IsMemcpy)
Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and t2DoLoopEnd.
static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformBUILD_VECTORCombine - Target-specific dag combine xforms for ISD::BUILD_VECTOR.
static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformMinMaxCombine - Target-specific DAG combining for creating truncating saturates.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file a TargetTransformInfoImplBase conforming object specific to the ARM target machine.
Function Alias Analysis false
Function Alias Analysis Results
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, dxil::ResourceTypeInfo &RTI)
static void createStoreIntrinsic(IntrinsicInst *II, StoreInst *SI, Value *Offset, dxil::ResourceTypeInfo &RTI)
This file defines the DenseMap class.
static bool isSigned(unsigned int Opcode)
#define Check(C,...)
#define op(i)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Value * LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP)
Emit the code to lower ctpop of V before the specified instruction IP.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
nvptx lower args
uint64_t High
uint64_t IntrinsicInst * II
PowerPC Reduce CR logical Operation
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
SI Lower i1 Copies
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This file describes how to lower LLVM code to machine code.
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static constexpr int Concat[]
Value * RHS
Value * LHS
BinaryOperator * Mul
static bool isIntrinsic(const CallBase &Call, Intrinsic::ID ID)
The Input class is used to parse a yaml document into in-memory structs and vectors.
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
LLVM_ABI bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5995
APInt bitcastToAPInt() const
Definition APFloat.h:1335
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1314
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:424
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1671
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1513
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1202
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1112
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1640
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1599
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
unsigned logBase2() const
Definition APInt.h:1762
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:859
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1657
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1222
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
const ARMBaseRegisterInfo & getRegisterInfo() const
const uint32_t * getSjLjDispatchPreservedMask(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
Register getFrameRegister(const MachineFunction &MF) const override
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
const uint32_t * getTLSCallPreservedMask(const MachineFunction &MF) const
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that 'returned' is on...
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
SmallPtrSet< const GlobalVariable *, 2 > & getGlobalsPromotedToConstantPool()
void setArgumentStackToRestore(unsigned v)
void setArgRegsSaveSize(unsigned s)
void setReturnRegsCount(unsigned s)
unsigned getArgRegsSaveSize() const
void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV)
Indicate to the backend that GV has had its storage changed to inside a constant pool.
void setArgumentStackSize(unsigned size)
unsigned getArgumentStackSize() const
const Triple & getTargetTriple() const
const ARMBaseInstrInfo * getInstrInfo() const override
bool isThumb1Only() const
bool useFPVFMx() const
bool isThumb2() const
bool isTargetWindows() const
bool hasBaseDSP() const
const ARMTargetLowering * getTargetLowering() const override
const ARMBaseRegisterInfo * getRegisterInfo() const override
bool hasVFP2Base() const
bool useFPVFMx64() const
bool isLittle() const
bool useFPVFMx16() const
bool isMClass() const
bool useMulOps() const
Align getDualLoadStoreAlignment() const
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool isReadOnly(const GlobalValue *GV) const
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Align getABIAlignmentForCallingConv(Type *ArgTy, const DataLayout &DL) const override
Return the correct alignment for the current calling convention.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
const ARMSubtarget * getSubtarget() const
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const
bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const
Returns true if the addressing mode representing by AM is legal for the Thumb1 target,...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
SDValue PerformMVETruncCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize=false) const override
isFPImmLegal - Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
SDValue PerformIntrinsicCombine(SDNode *N, DAGCombinerInfo &DCI) const
PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
bool shouldFoldConstantShiftPairToMask(const SDNode *N) const override
Return true if it is profitable to fold a pair of shifts into a mask.
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const
PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
Value * createComplexDeinterleavingIR(IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator=nullptr) const override
Create the IR node for the given complex deinterleaving operation.
bool isComplexDeinterleavingSupported() const override
Does this target support complex deinterleaving.
SDValue PerformMVEExtCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &OriginalDemandedBits, const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the value type to use for ISD::SETCC.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
createFastISel - This method returns a target specific FastISel object, or null if the target does no...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
isShuffleMaskLegal - Targets can use this to indicate that they only support some VECTOR_SHUFFLE oper...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
bool lowerInterleavedStore(Instruction *Store, Value *Mask, ShuffleVectorInst *SVI, unsigned Factor, const APInt &GapMask) const override
Lower an interleaved store into a vstN intrinsic.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const override
getRegClassFor - Return the register class that should be used for the specified value type.
bool useLoadStackGuardNode(const Module &M) const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
bool lowerInterleavedLoad(Instruction *Load, Value *Mask, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor, const APInt &GapMask) const override
Lower an interleaved load into a vldN intrinsic.
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
bool preferSelectsOverBooleanArithmetic(EVT VT) const override
Should we prefer selects to doing arithmetic on boolean types.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI)
bool isComplexDeinterleavingOperationSupported(ComplexDeinterleavingOperation Operation, Type *Ty) const override
Does this target support complex deinterleaving with the given operation and type.
bool supportKCFIBundles() const override
Return true if the target supports kcfi operand bundles.
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const
PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const override
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
Instruction * makeDMB(IRBuilderBase &Builder, ARM_MB::MemBOpt Domain) const
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
const char * LowerXConstraint(EVT ConstraintVT) const override
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
allowsMisalignedMemoryAccesses - Returns true if the target allows unaligned memory accesses of the s...
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
bool isVectorLoadExtDesirable(SDValue ExtVal) const override
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override
Return true if the target can combine store(extractelement VectorTy,Idx).
bool useSoftFloat() const override
bool alignLoopsWithOptSize() const override
Should loops be aligned even when the function is marked OptSize (but not MinSize).
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
Returns true if an argument of type Ty needs to be passed in a contiguous block of registers in calli...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
const ARMBaseTargetMachine & getTM() const
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPostIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mo...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
bool isFloatingPointOperation() const
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
The address of a basic block.
Definition Constants.h:899
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
LLVM_ABI int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2,...
CCState - This class holds information needed while lowering arguments and return values.
void getInRegsParamInfo(unsigned InRegsParamRecordIndex, unsigned &BeginReg, unsigned &EndReg) const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
static LLVM_ABI bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
LLVM_ABI bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
LLVM_ABI void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
unsigned getInRegsParamsProcessed() const
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd)
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
unsigned getInRegsParamsCount() const
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
AttributeList getAttributes() const
Return the attributes for this call.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:715
const APFloat & getValueAPF() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:207
bool isBigEndian() const
Definition DataLayout.h:208
MaybeAlign getStackAlignment() const
Returns the natural stack alignment, or MaybeAlign() if one wasn't specified.
Definition DataLayout.h:237
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
LLVM_ABI Align getPreferredAlign(const GlobalVariable *GV) const
Returns the preferred alignment of the specified global.
StringRef getPrivateGlobalPrefix() const
Definition DataLayout.h:295
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
unsigned size() const
Definition DenseMap.h:110
bool empty() const
Definition DenseMap.h:109
iterator begin()
Definition DenseMap.h:78
iterator end()
Definition DenseMap.h:81
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Type * getParamType(unsigned i) const
Parameter type accessors.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
arg_iterator arg_begin()
Definition Function.h:866
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool hasStructRetAttr() const
Determine if the function returns a structure through first or second pointer argument.
Definition Function.h:687
const Argument * const_arg_iterator
Definition Function.h:73
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:227
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
const GlobalValue * getGlobal() const
bool isDSOLocal() const
bool hasExternalWeakLinkage() const
bool hasDLLImportStorageClass() const
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
@ InternalLinkage
Rename collisions when linking (static functions).
Definition GlobalValue.h:60
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
LLVM_ABI bool hasAtomicStore() const LLVM_READONLY
Return true if this atomic instruction stores to memory.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Describe properties that are true of each instruction in the target description file.
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
bool is64BitVector() const
Return true if this is a 64-bit vector type.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
LLVM_ABI MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
LLVM_ABI MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI void moveAfter(MachineBasicBlock *NewBefore)
void setIsEHPad(bool V=true)
Indicates the block is a landing pad.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI void computeMaxCallFrameSize(MachineFunction &MF, std::vector< MachineBasicBlock::iterator > *FrameSDOps=nullptr)
Computes the maximum size of a callframe.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getFunctionContextIndex() const
Return the index for the function context object.
Properties which a MachineFunction may have at a given point in time.
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
MachineOperand * mop_iterator
iterator/begin/end - Iterate over all operands of a machine instruction.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI unsigned createJumpTableIndex(const std::vector< MachineBasicBlock * > &DestBBs)
createJumpTableIndex - Create a new jump table.
@ EK_Inline
EK_Inline - Jump table entries are emitted inline at their point of use.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
LLVM_ABI void setIsRenamable(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
LLVM_ABI void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This class is used to represent an MLOAD node.
This class is used to represent an MSTORE node.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
Align getAlign() const
bool isVolatile() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
LLVM_ABI bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
iterator_range< user_iterator > users()
void setFlags(SDNodeFlags NewFlags)
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags, bool AllowCommute=false)
Get the specified node if it's already available, or else return NULL.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
LLVM_ABI MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static LLVM_ABI bool isSplatMask(ArrayRef< int > Mask)
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
bool empty() const
Definition SmallSet.h:168
bool erase(const T &V)
Definition SmallSet.h:199
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
const unsigned char * bytes_end() const
Definition StringRef.h:127
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
const unsigned char * bytes_begin() const
Definition StringRef.h:124
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
ShiftLegalizationStrategy
Return the preferred strategy to legalize tihs SHIFT instruction, with ExpansionFactor being the recu...
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
const TargetMachine & getTargetMachine() const
void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked load does or does not work with the specified type and ind...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
virtual unsigned getMaxSupportedInterleaveFactor() const
Get the maximum supported factor for interleaved memory accesses.
void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked store does or does not work with the specified type and in...
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
static StringRef getLibcallImplName(RTLIB::LibcallImpl Call)
Get the libcall routine name for the specified libcall implementation.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
bool isPositionIndependent() const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
virtual ArrayRef< MCPhysReg > getRoundingControlRegisters() const
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
ExceptionHandling getExceptionModel() const
Return the ExceptionHandling to use, considering TargetOptions and the Triple's default.
const Triple & getTargetTriple() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
TargetOptions Options
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
ObjectFormatType getObjectFormat() const
Get the object format for this triple.
Definition Triple.h:439
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
Definition Type.cpp:295
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:142
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
Base class of all SIMD vector types.
Type * getElementType() const
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition DenseSet.h:175
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
IteratorT end() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static CondCodes getOppositeCondition(CondCodes CC)
Definition ARMBaseInfo.h:48
@ SECREL
Thread Pointer Offset.
@ SBREL
Section Relative (Windows TLS)
@ GOTTPOFF
Global Offset Table, PC Relative.
@ TPOFF
Global Offset Table, Thread Pointer Offset.
TOF
Target Operand Flag enum.
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
@ MO_SBREL
MO_SBREL - On a symbol operand, this represents a static base relative relocation.
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
unsigned getAM2Offset(unsigned AM2Opc)
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
unsigned createVMOVModImm(unsigned OpCmode, unsigned Val)
int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
int getFP32FP16Imm(const APInt &Imm)
If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding for it.
AddrOpc getAM2Op(unsigned AM2Opc)
bool isBitFieldInvertedMask(unsigned v)
const unsigned FPStatusBits
const unsigned FPReservedBits
const unsigned RoundingBitsPos
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Entry
Definition COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ ARM_APCS
ARM Procedure Calling Standard (obsolete, but still used on some targets).
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition CallingConv.h:82
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ ARM_AAPCS
ARM Architecture Procedure Calling Standard calling convention (aka EABI).
@ CXX_FAST_TLS
Used for access functions.
Definition CallingConv.h:72
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition CallingConv.h:87
@ ARM_AAPCS_VFP
Same as ARM_AAPCS, but uses hard floating point ABI.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:780
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:163
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition ISDOpcodes.h:531
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ EH_SJLJ_SETUP_DISPATCH
OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) The target initializes the dispatch table here.
Definition ISDOpcodes.h:167
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:431
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:712
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:779
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition ISDOpcodes.h:815
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:958
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:351
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:457
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ SCMP
[US]CMP - 3-way comparison of signed or unsigned integers.
Definition ISDOpcodes.h:732
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:707
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ SPONENTRY
SPONENTRY - Represents the llvm.sponentry intrinsic.
Definition ISDOpcodes.h:122
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:933
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:157
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:527
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:719
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
static const int LAST_INDEXED_MODE
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool RetFastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2058
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool CC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:255
ExceptionHandling
Definition CodeGen.h:53
@ SjLj
setjmp/longjmp based exceptions
Definition CodeGen.h:56
bool RetCC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool RetCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
bool RetCC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
void shuffle(Iterator first, Iterator last, RNG &&g)
Definition STLExtras.h:1516
bool CC_ARM_APCS_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
bool isReleaseOrStronger(AtomicOrdering AO)
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool FastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool CC_ARM_Win32_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
CombineLevel
Definition DAGCombine.h:15
@ BeforeLegalizeTypes
Definition DAGCombine.h:16
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr U AbsoluteValue(T X)
Return the absolute value of a signed integer, converted to the corresponding unsigned integer type.
Definition MathExtras.h:592
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1961
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
unsigned gettBLXrOpcode(const MachineFunction &MF)
bool CC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
bool CC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
Load/store instruction that can be merged with a base address update.
SDNode * N
Instruction that updates a pointer.
unsigned ConstInc
Pointer increment value if it is a constant, or 0 otherwise.
SDValue Inc
Pointer increment operand.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:761
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:470
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:202
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
EVT ArgVT
Usually the non-legalized type of the argument, which is the EVT corresponding to the OrigTy IR type.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:301
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:172
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:311
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:180
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition KnownBits.h:347
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
APInt getSignedMinValue() const
Return the minimal signed value possible given these KnownBits.
Definition KnownBits.h:135
Matching combinators.
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoSignedZeros() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setInRegister(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...