LLVM 20.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64
65 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
66 /// inline asm expressions.
68 InlineAsm::ConstraintCode ConstraintID,
69 std::vector<SDValue> &OutOps) override;
70
71 template <signed Low, signed High, signed Scale>
72 bool SelectRDVLImm(SDValue N, SDValue &Imm);
73
74 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
75 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
76 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
77 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
78 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
79 return SelectShiftedRegister(N, false, Reg, Shift);
80 }
81 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
82 return SelectShiftedRegister(N, true, Reg, Shift);
83 }
84 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
85 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
86 }
87 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
88 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
89 }
90 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
91 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
92 }
93 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
94 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
95 }
96 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
97 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
98 }
99 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
100 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
101 }
102 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
103 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
104 }
105 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
106 return SelectAddrModeIndexed(N, 1, Base, OffImm);
107 }
108 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
109 return SelectAddrModeIndexed(N, 2, Base, OffImm);
110 }
111 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
112 return SelectAddrModeIndexed(N, 4, Base, OffImm);
113 }
114 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
115 return SelectAddrModeIndexed(N, 8, Base, OffImm);
116 }
117 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
118 return SelectAddrModeIndexed(N, 16, Base, OffImm);
119 }
120 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
121 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
122 }
123 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
124 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
125 }
126 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
127 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
128 }
129 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
130 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
131 }
132 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
133 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
134 }
135 template <unsigned Size, unsigned Max>
136 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
137 // Test if there is an appropriate addressing mode and check if the
138 // immediate fits.
139 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
140 if (Found) {
141 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
142 int64_t C = CI->getSExtValue();
143 if (C <= Max)
144 return true;
145 }
146 }
147
148 // Otherwise, base only, materialize address in register.
149 Base = N;
150 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
151 return true;
152 }
153
154 template<int Width>
155 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
156 SDValue &SignExtend, SDValue &DoShift) {
157 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
158 }
159
160 template<int Width>
161 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
162 SDValue &SignExtend, SDValue &DoShift) {
163 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
164 }
165
166 bool SelectExtractHigh(SDValue N, SDValue &Res) {
167 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
168 N = N->getOperand(0);
169 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
170 !isa<ConstantSDNode>(N->getOperand(1)))
171 return false;
172 EVT VT = N->getValueType(0);
173 EVT LVT = N->getOperand(0).getValueType();
174 unsigned Index = N->getConstantOperandVal(1);
175 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
176 Index != VT.getVectorNumElements())
177 return false;
178 Res = N->getOperand(0);
179 return true;
180 }
181
182 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
183 if (N.getOpcode() != AArch64ISD::VLSHR)
184 return false;
185 SDValue Op = N->getOperand(0);
186 EVT VT = Op.getValueType();
187 unsigned ShtAmt = N->getConstantOperandVal(1);
188 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
189 return false;
190
191 APInt Imm;
192 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
193 Imm = APInt(VT.getScalarSizeInBits(),
194 Op.getOperand(1).getConstantOperandVal(0)
195 << Op.getOperand(1).getConstantOperandVal(1));
196 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
197 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
198 Imm = APInt(VT.getScalarSizeInBits(),
199 Op.getOperand(1).getConstantOperandVal(0));
200 else
201 return false;
202
203 if (Imm != 1ULL << (ShtAmt - 1))
204 return false;
205
206 Res1 = Op.getOperand(0);
207 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
208 return true;
209 }
210
211 bool SelectDupZeroOrUndef(SDValue N) {
212 switch(N->getOpcode()) {
213 case ISD::UNDEF:
214 return true;
215 case AArch64ISD::DUP:
216 case ISD::SPLAT_VECTOR: {
217 auto Opnd0 = N->getOperand(0);
218 if (isNullConstant(Opnd0))
219 return true;
220 if (isNullFPConstant(Opnd0))
221 return true;
222 break;
223 }
224 default:
225 break;
226 }
227
228 return false;
229 }
230
231 bool SelectAny(SDValue) { return true; }
232
233 bool SelectDupZero(SDValue N) {
234 switch(N->getOpcode()) {
235 case AArch64ISD::DUP:
236 case ISD::SPLAT_VECTOR: {
237 auto Opnd0 = N->getOperand(0);
238 if (isNullConstant(Opnd0))
239 return true;
240 if (isNullFPConstant(Opnd0))
241 return true;
242 break;
243 }
244 }
245
246 return false;
247 }
248
249 template<MVT::SimpleValueType VT>
250 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
251 return SelectSVEAddSubImm(N, VT, Imm, Shift);
252 }
253
254 template <MVT::SimpleValueType VT, bool Negate>
255 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
256 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
257 }
258
259 template <MVT::SimpleValueType VT>
260 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
261 return SelectSVECpyDupImm(N, VT, Imm, Shift);
262 }
263
264 template <MVT::SimpleValueType VT, bool Invert = false>
265 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
266 return SelectSVELogicalImm(N, VT, Imm, Invert);
267 }
268
269 template <MVT::SimpleValueType VT>
270 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
271 return SelectSVEArithImm(N, VT, Imm);
272 }
273
274 template <unsigned Low, unsigned High, bool AllowSaturation = false>
275 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
276 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
277 }
278
279 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
280 if (N->getOpcode() != ISD::SPLAT_VECTOR)
281 return false;
282
283 EVT EltVT = N->getValueType(0).getVectorElementType();
284 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
285 /* High */ EltVT.getFixedSizeInBits(),
286 /* AllowSaturation */ true, Imm);
287 }
288
289 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
290 template<signed Min, signed Max, signed Scale, bool Shift>
291 bool SelectCntImm(SDValue N, SDValue &Imm) {
292 if (!isa<ConstantSDNode>(N))
293 return false;
294
295 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
296 if (Shift)
297 MulImm = 1LL << MulImm;
298
299 if ((MulImm % std::abs(Scale)) != 0)
300 return false;
301
302 MulImm /= Scale;
303 if ((MulImm >= Min) && (MulImm <= Max)) {
304 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
305 return true;
306 }
307
308 return false;
309 }
310
311 template <signed Max, signed Scale>
312 bool SelectEXTImm(SDValue N, SDValue &Imm) {
313 if (!isa<ConstantSDNode>(N))
314 return false;
315
316 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
317
318 if (MulImm >= 0 && MulImm <= Max) {
319 MulImm *= Scale;
320 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
321 return true;
322 }
323
324 return false;
325 }
326
327 template <unsigned BaseReg, unsigned Max>
328 bool ImmToReg(SDValue N, SDValue &Imm) {
329 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
330 uint64_t C = CI->getZExtValue();
331
332 if (C > Max)
333 return false;
334
335 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
336 return true;
337 }
338 return false;
339 }
340
341 /// Form sequences of consecutive 64/128-bit registers for use in NEON
342 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
343 /// between 1 and 4 elements. If it contains a single element that is returned
344 /// unchanged; otherwise a REG_SEQUENCE value is returned.
347 // Form a sequence of SVE registers for instructions using list of vectors,
348 // e.g. structured loads and stores (ldN, stN).
349 SDValue createZTuple(ArrayRef<SDValue> Vecs);
350
351 // Similar to above, except the register must start at a multiple of the
352 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
353 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
354
355 /// Generic helper for the createDTuple/createQTuple
356 /// functions. Those should almost always be called instead.
357 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
358 const unsigned SubRegs[]);
359
360 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
361
362 bool tryIndexedLoad(SDNode *N);
363
364 void SelectPtrauthAuth(SDNode *N);
365 void SelectPtrauthResign(SDNode *N);
366
367 bool trySelectStackSlotTagP(SDNode *N);
368 void SelectTagP(SDNode *N);
369
370 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
371 unsigned SubRegIdx);
372 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
373 unsigned SubRegIdx);
374 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
375 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
376 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
377 unsigned Opc_rr, unsigned Opc_ri,
378 bool IsIntr = false);
379 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
380 unsigned Scale, unsigned Opc_ri,
381 unsigned Opc_rr);
382 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
383 bool IsZmMulti, unsigned Opcode,
384 bool HasPred = false);
385 void SelectPExtPair(SDNode *N, unsigned Opc);
386 void SelectWhilePair(SDNode *N, unsigned Opc);
387 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
388 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
389 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
390 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
391 bool IsTupleInput, unsigned Opc);
392 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
393
394 template <unsigned MaxIdx, unsigned Scale>
395 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
396 unsigned Op);
397 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
398 unsigned Op, unsigned MaxIdx, unsigned Scale,
399 unsigned BaseReg = 0);
400 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
401 /// SVE Reg+Imm addressing mode.
402 template <int64_t Min, int64_t Max>
403 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
404 SDValue &OffImm);
405 /// SVE Reg+Reg address mode.
406 template <unsigned Scale>
407 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
408 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
409 }
410
411 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
412 unsigned Opc, uint32_t MaxImm);
413
414 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
415
416 template <unsigned MaxIdx, unsigned Scale>
417 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
418 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
419 }
420
421 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
422 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
423 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
424 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
425 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
426 unsigned Opc_rr, unsigned Opc_ri);
427 std::tuple<unsigned, SDValue, SDValue>
428 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
429 const SDValue &OldBase, const SDValue &OldOffset,
430 unsigned Scale);
431
432 bool tryBitfieldExtractOp(SDNode *N);
433 bool tryBitfieldExtractOpFromSExt(SDNode *N);
434 bool tryBitfieldInsertOp(SDNode *N);
435 bool tryBitfieldInsertInZeroOp(SDNode *N);
436 bool tryShiftAmountMod(SDNode *N);
437
438 bool tryReadRegister(SDNode *N);
439 bool tryWriteRegister(SDNode *N);
440
441 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
442 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
443
444 bool trySelectXAR(SDNode *N);
445
446// Include the pieces autogenerated from the target description.
447#include "AArch64GenDAGISel.inc"
448
449private:
450 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
451 SDValue &Shift);
452 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
453 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
454 SDValue &OffImm) {
455 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
456 }
457 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
458 unsigned Size, SDValue &Base,
459 SDValue &OffImm);
460 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
461 SDValue &OffImm);
462 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
463 SDValue &OffImm);
464 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
465 SDValue &Offset, SDValue &SignExtend,
466 SDValue &DoShift);
467 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
468 SDValue &Offset, SDValue &SignExtend,
469 SDValue &DoShift);
470 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
471 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
472 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
473 SDValue &Offset, SDValue &SignExtend);
474
475 template<unsigned RegWidth>
476 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
477 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
478 }
479
480 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
481
482 template<unsigned RegWidth>
483 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
484 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
485 }
486
487 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
488 unsigned Width);
489
490 bool SelectCMP_SWAP(SDNode *N);
491
492 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
493 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
494 bool Negate);
495 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
496 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
497
498 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
499 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
500 bool AllowSaturation, SDValue &Imm);
501
502 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
503 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
504 SDValue &Offset);
505 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
506 SDValue &Offset, unsigned Scale = 1);
507
508 bool SelectAllActivePredicate(SDValue N);
509 bool SelectAnyPredicate(SDValue N);
510};
511
512class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
513public:
514 static char ID;
515 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
516 CodeGenOptLevel OptLevel)
518 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
519};
520} // end anonymous namespace
521
522char AArch64DAGToDAGISelLegacy::ID = 0;
523
524INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
525
526/// isIntImmediate - This method tests to see if the node is a constant
527/// operand. If so Imm will receive the 32-bit value.
528static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
529 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
530 Imm = C->getZExtValue();
531 return true;
532 }
533 return false;
534}
535
536// isIntImmediate - This method tests to see if a constant operand.
537// If so Imm will receive the value.
538static bool isIntImmediate(SDValue N, uint64_t &Imm) {
539 return isIntImmediate(N.getNode(), Imm);
540}
541
542// isOpcWithIntImmediate - This method tests to see if the node is a specific
543// opcode and that it has a immediate integer right operand.
544// If so Imm will receive the 32 bit value.
545static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
546 uint64_t &Imm) {
547 return N->getOpcode() == Opc &&
548 isIntImmediate(N->getOperand(1).getNode(), Imm);
549}
550
551// isIntImmediateEq - This method tests to see if N is a constant operand that
552// is equivalent to 'ImmExpected'.
553#ifndef NDEBUG
554static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
555 uint64_t Imm;
556 if (!isIntImmediate(N.getNode(), Imm))
557 return false;
558 return Imm == ImmExpected;
559}
560#endif
561
562bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
563 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
564 std::vector<SDValue> &OutOps) {
565 switch(ConstraintID) {
566 default:
567 llvm_unreachable("Unexpected asm memory constraint");
568 case InlineAsm::ConstraintCode::m:
569 case InlineAsm::ConstraintCode::o:
570 case InlineAsm::ConstraintCode::Q:
571 // We need to make sure that this one operand does not end up in XZR, thus
572 // require the address to be in a PointerRegClass register.
573 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
574 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
575 SDLoc dl(Op);
576 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
577 SDValue NewOp =
578 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
579 dl, Op.getValueType(),
580 Op, RC), 0);
581 OutOps.push_back(NewOp);
582 return false;
583 }
584 return true;
585}
586
587/// SelectArithImmed - Select an immediate value that can be represented as
588/// a 12-bit value shifted left by either 0 or 12. If so, return true with
589/// Val set to the 12-bit value and Shift set to the shifter operand.
590bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
591 SDValue &Shift) {
592 // This function is called from the addsub_shifted_imm ComplexPattern,
593 // which lists [imm] as the list of opcode it's interested in, however
594 // we still need to check whether the operand is actually an immediate
595 // here because the ComplexPattern opcode list is only used in
596 // root-level opcode matching.
597 if (!isa<ConstantSDNode>(N.getNode()))
598 return false;
599
600 uint64_t Immed = N.getNode()->getAsZExtVal();
601 unsigned ShiftAmt;
602
603 if (Immed >> 12 == 0) {
604 ShiftAmt = 0;
605 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
606 ShiftAmt = 12;
607 Immed = Immed >> 12;
608 } else
609 return false;
610
611 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
612 SDLoc dl(N);
613 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
614 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
615 return true;
616}
617
618/// SelectNegArithImmed - As above, but negates the value before trying to
619/// select it.
620bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
621 SDValue &Shift) {
622 // This function is called from the addsub_shifted_imm ComplexPattern,
623 // which lists [imm] as the list of opcode it's interested in, however
624 // we still need to check whether the operand is actually an immediate
625 // here because the ComplexPattern opcode list is only used in
626 // root-level opcode matching.
627 if (!isa<ConstantSDNode>(N.getNode()))
628 return false;
629
630 // The immediate operand must be a 24-bit zero-extended immediate.
631 uint64_t Immed = N.getNode()->getAsZExtVal();
632
633 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
634 // have the opposite effect on the C flag, so this pattern mustn't match under
635 // those circumstances.
636 if (Immed == 0)
637 return false;
638
639 if (N.getValueType() == MVT::i32)
640 Immed = ~((uint32_t)Immed) + 1;
641 else
642 Immed = ~Immed + 1ULL;
643 if (Immed & 0xFFFFFFFFFF000000ULL)
644 return false;
645
646 Immed &= 0xFFFFFFULL;
647 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
648 Shift);
649}
650
651/// getShiftTypeForNode - Translate a shift node to the corresponding
652/// ShiftType value.
654 switch (N.getOpcode()) {
655 default:
657 case ISD::SHL:
658 return AArch64_AM::LSL;
659 case ISD::SRL:
660 return AArch64_AM::LSR;
661 case ISD::SRA:
662 return AArch64_AM::ASR;
663 case ISD::ROTR:
664 return AArch64_AM::ROR;
665 }
666}
667
668/// Determine whether it is worth it to fold SHL into the addressing
669/// mode.
671 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
672 // It is worth folding logical shift of up to three places.
673 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
674 if (!CSD)
675 return false;
676 unsigned ShiftVal = CSD->getZExtValue();
677 if (ShiftVal > 3)
678 return false;
679
680 // Check if this particular node is reused in any non-memory related
681 // operation. If yes, do not try to fold this node into the address
682 // computation, since the computation will be kept.
683 const SDNode *Node = V.getNode();
684 for (SDNode *UI : Node->users())
685 if (!isa<MemSDNode>(*UI))
686 for (SDNode *UII : UI->users())
687 if (!isa<MemSDNode>(*UII))
688 return false;
689 return true;
690}
691
692/// Determine whether it is worth to fold V into an extended register addressing
693/// mode.
694bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
695 // Trivial if we are optimizing for code size or if there is only
696 // one use of the value.
697 if (CurDAG->shouldOptForSize() || V.hasOneUse())
698 return true;
699
700 // If a subtarget has a slow shift, folding a shift into multiple loads
701 // costs additional micro-ops.
702 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
703 return false;
704
705 // Check whether we're going to emit the address arithmetic anyway because
706 // it's used by a non-address operation.
707 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
708 return true;
709 if (V.getOpcode() == ISD::ADD) {
710 const SDValue LHS = V.getOperand(0);
711 const SDValue RHS = V.getOperand(1);
712 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
713 return true;
714 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
715 return true;
716 }
717
718 // It hurts otherwise, since the value will be reused.
719 return false;
720}
721
722/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
723/// to select more shifted register
724bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
725 SDValue &Shift) {
726 EVT VT = N.getValueType();
727 if (VT != MVT::i32 && VT != MVT::i64)
728 return false;
729
730 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
731 return false;
732 SDValue LHS = N.getOperand(0);
733 if (!LHS->hasOneUse())
734 return false;
735
736 unsigned LHSOpcode = LHS->getOpcode();
737 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
738 return false;
739
740 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
741 if (!ShiftAmtNode)
742 return false;
743
744 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
745 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
746 if (!RHSC)
747 return false;
748
749 APInt AndMask = RHSC->getAPIntValue();
750 unsigned LowZBits, MaskLen;
751 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
752 return false;
753
754 unsigned BitWidth = N.getValueSizeInBits();
755 SDLoc DL(LHS);
756 uint64_t NewShiftC;
757 unsigned NewShiftOp;
758 if (LHSOpcode == ISD::SHL) {
759 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
760 // BitWidth != LowZBits + MaskLen doesn't match the pattern
761 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
762 return false;
763
764 NewShiftC = LowZBits - ShiftAmtC;
765 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
766 } else {
767 if (LowZBits == 0)
768 return false;
769
770 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
771 NewShiftC = LowZBits + ShiftAmtC;
772 if (NewShiftC >= BitWidth)
773 return false;
774
775 // SRA need all high bits
776 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
777 return false;
778
779 // SRL high bits can be 0 or 1
780 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
781 return false;
782
783 if (LHSOpcode == ISD::SRL)
784 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
785 else
786 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
787 }
788
789 assert(NewShiftC < BitWidth && "Invalid shift amount");
790 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
791 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
792 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
793 NewShiftAmt, BitWidthMinus1),
794 0);
795 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
796 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
797 return true;
798}
799
800/// getExtendTypeForNode - Translate an extend node to the corresponding
801/// ExtendType value.
803getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
804 if (N.getOpcode() == ISD::SIGN_EXTEND ||
805 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
806 EVT SrcVT;
807 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
808 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
809 else
810 SrcVT = N.getOperand(0).getValueType();
811
812 if (!IsLoadStore && SrcVT == MVT::i8)
813 return AArch64_AM::SXTB;
814 else if (!IsLoadStore && SrcVT == MVT::i16)
815 return AArch64_AM::SXTH;
816 else if (SrcVT == MVT::i32)
817 return AArch64_AM::SXTW;
818 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
819
821 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
822 N.getOpcode() == ISD::ANY_EXTEND) {
823 EVT SrcVT = N.getOperand(0).getValueType();
824 if (!IsLoadStore && SrcVT == MVT::i8)
825 return AArch64_AM::UXTB;
826 else if (!IsLoadStore && SrcVT == MVT::i16)
827 return AArch64_AM::UXTH;
828 else if (SrcVT == MVT::i32)
829 return AArch64_AM::UXTW;
830 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
831
833 } else if (N.getOpcode() == ISD::AND) {
834 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
835 if (!CSD)
837 uint64_t AndMask = CSD->getZExtValue();
838
839 switch (AndMask) {
840 default:
842 case 0xFF:
843 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
844 case 0xFFFF:
845 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
846 case 0xFFFFFFFF:
847 return AArch64_AM::UXTW;
848 }
849 }
850
852}
853
854/// Determine whether it is worth to fold V into an extended register of an
855/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
856/// instruction, and the shift should be treated as worth folding even if has
857/// multiple uses.
858bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
859 // Trivial if we are optimizing for code size or if there is only
860 // one use of the value.
861 if (CurDAG->shouldOptForSize() || V.hasOneUse())
862 return true;
863
864 // If a subtarget has a fastpath LSL we can fold a logical shift into
865 // the add/sub and save a cycle.
866 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
867 V.getConstantOperandVal(1) <= 4 &&
869 return true;
870
871 // It hurts otherwise, since the value will be reused.
872 return false;
873}
874
875/// SelectShiftedRegister - Select a "shifted register" operand. If the value
876/// is not shifted, set the Shift operand to default of "LSL 0". The logical
877/// instructions allow the shifted register to be rotated, but the arithmetic
878/// instructions do not. The AllowROR parameter specifies whether ROR is
879/// supported.
880bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
881 SDValue &Reg, SDValue &Shift) {
882 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
883 return true;
884
886 if (ShType == AArch64_AM::InvalidShiftExtend)
887 return false;
888 if (!AllowROR && ShType == AArch64_AM::ROR)
889 return false;
890
891 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
892 unsigned BitSize = N.getValueSizeInBits();
893 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
894 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
895
896 Reg = N.getOperand(0);
897 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
898 return isWorthFoldingALU(N, true);
899 }
900
901 return false;
902}
903
904/// Instructions that accept extend modifiers like UXTW expect the register
905/// being extended to be a GPR32, but the incoming DAG might be acting on a
906/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
907/// this is the case.
909 if (N.getValueType() == MVT::i32)
910 return N;
911
912 SDLoc dl(N);
913 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
914}
915
916// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
917template<signed Low, signed High, signed Scale>
918bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
919 if (!isa<ConstantSDNode>(N))
920 return false;
921
922 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
923 if ((MulImm % std::abs(Scale)) == 0) {
924 int64_t RDVLImm = MulImm / Scale;
925 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
926 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
927 return true;
928 }
929 }
930
931 return false;
932}
933
934/// SelectArithExtendedRegister - Select a "extended register" operand. This
935/// operand folds in an extend followed by an optional left shift.
936bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
937 SDValue &Shift) {
938 unsigned ShiftVal = 0;
940
941 if (N.getOpcode() == ISD::SHL) {
942 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
943 if (!CSD)
944 return false;
945 ShiftVal = CSD->getZExtValue();
946 if (ShiftVal > 4)
947 return false;
948
949 Ext = getExtendTypeForNode(N.getOperand(0));
951 return false;
952
953 Reg = N.getOperand(0).getOperand(0);
954 } else {
957 return false;
958
959 Reg = N.getOperand(0);
960
961 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
962 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
963 auto isDef32 = [](SDValue N) {
964 unsigned Opc = N.getOpcode();
965 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
966 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
967 Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
968 Opc != ISD::FREEZE;
969 };
970 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
971 isDef32(Reg))
972 return false;
973 }
974
975 // AArch64 mandates that the RHS of the operation must use the smallest
976 // register class that could contain the size being extended from. Thus,
977 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
978 // there might not be an actual 32-bit value in the program. We can
979 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
980 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
981 Reg = narrowIfNeeded(CurDAG, Reg);
982 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
983 MVT::i32);
984 return isWorthFoldingALU(N);
985}
986
987/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
988/// operand is refered by the instructions have SP operand
989bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
990 SDValue &Shift) {
991 unsigned ShiftVal = 0;
993
994 if (N.getOpcode() != ISD::SHL)
995 return false;
996
997 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
998 if (!CSD)
999 return false;
1000 ShiftVal = CSD->getZExtValue();
1001 if (ShiftVal > 4)
1002 return false;
1003
1005 Reg = N.getOperand(0);
1006 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1007 MVT::i32);
1008 return isWorthFoldingALU(N);
1009}
1010
1011/// If there's a use of this ADDlow that's not itself a load/store then we'll
1012/// need to create a real ADD instruction from it anyway and there's no point in
1013/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1014/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1015/// leads to duplicated ADRP instructions.
1017 for (auto *User : N->users()) {
1018 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1019 User->getOpcode() != ISD::ATOMIC_LOAD &&
1020 User->getOpcode() != ISD::ATOMIC_STORE)
1021 return false;
1022
1023 // ldar and stlr have much more restrictive addressing modes (just a
1024 // register).
1025 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1026 return false;
1027 }
1028
1029 return true;
1030}
1031
1032/// Check if the immediate offset is valid as a scaled immediate.
1033static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1034 unsigned Size) {
1035 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1036 Offset < (Range << Log2_32(Size)))
1037 return true;
1038 return false;
1039}
1040
1041/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1042/// immediate" address. The "Size" argument is the size in bytes of the memory
1043/// reference, which determines the scale.
1044bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1045 unsigned BW, unsigned Size,
1046 SDValue &Base,
1047 SDValue &OffImm) {
1048 SDLoc dl(N);
1049 const DataLayout &DL = CurDAG->getDataLayout();
1050 const TargetLowering *TLI = getTargetLowering();
1051 if (N.getOpcode() == ISD::FrameIndex) {
1052 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1053 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1054 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1055 return true;
1056 }
1057
1058 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1059 // selected here doesn't support labels/immediates, only base+offset.
1060 if (CurDAG->isBaseWithConstantOffset(N)) {
1061 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1062 if (IsSignedImm) {
1063 int64_t RHSC = RHS->getSExtValue();
1064 unsigned Scale = Log2_32(Size);
1065 int64_t Range = 0x1LL << (BW - 1);
1066
1067 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1068 RHSC < (Range << Scale)) {
1069 Base = N.getOperand(0);
1070 if (Base.getOpcode() == ISD::FrameIndex) {
1071 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1072 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1073 }
1074 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1075 return true;
1076 }
1077 } else {
1078 // unsigned Immediate
1079 uint64_t RHSC = RHS->getZExtValue();
1080 unsigned Scale = Log2_32(Size);
1081 uint64_t Range = 0x1ULL << BW;
1082
1083 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1084 Base = N.getOperand(0);
1085 if (Base.getOpcode() == ISD::FrameIndex) {
1086 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1087 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1088 }
1089 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1090 return true;
1091 }
1092 }
1093 }
1094 }
1095 // Base only. The address will be materialized into a register before
1096 // the memory is accessed.
1097 // add x0, Xbase, #offset
1098 // stp x1, x2, [x0]
1099 Base = N;
1100 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1101 return true;
1102}
1103
1104/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1105/// immediate" address. The "Size" argument is the size in bytes of the memory
1106/// reference, which determines the scale.
1107bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1108 SDValue &Base, SDValue &OffImm) {
1109 SDLoc dl(N);
1110 const DataLayout &DL = CurDAG->getDataLayout();
1111 const TargetLowering *TLI = getTargetLowering();
1112 if (N.getOpcode() == ISD::FrameIndex) {
1113 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1114 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1115 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1116 return true;
1117 }
1118
1119 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1120 GlobalAddressSDNode *GAN =
1121 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1122 Base = N.getOperand(0);
1123 OffImm = N.getOperand(1);
1124 if (!GAN)
1125 return true;
1126
1127 if (GAN->getOffset() % Size == 0 &&
1129 return true;
1130 }
1131
1132 if (CurDAG->isBaseWithConstantOffset(N)) {
1133 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1134 int64_t RHSC = (int64_t)RHS->getZExtValue();
1135 unsigned Scale = Log2_32(Size);
1136 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1137 Base = N.getOperand(0);
1138 if (Base.getOpcode() == ISD::FrameIndex) {
1139 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1140 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1141 }
1142 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1143 return true;
1144 }
1145 }
1146 }
1147
1148 // Before falling back to our general case, check if the unscaled
1149 // instructions can handle this. If so, that's preferable.
1150 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1151 return false;
1152
1153 // Base only. The address will be materialized into a register before
1154 // the memory is accessed.
1155 // add x0, Xbase, #offset
1156 // ldr x0, [x0]
1157 Base = N;
1158 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1159 return true;
1160}
1161
1162/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1163/// immediate" address. This should only match when there is an offset that
1164/// is not valid for a scaled immediate addressing mode. The "Size" argument
1165/// is the size in bytes of the memory reference, which is needed here to know
1166/// what is valid for a scaled immediate.
1167bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1168 SDValue &Base,
1169 SDValue &OffImm) {
1170 if (!CurDAG->isBaseWithConstantOffset(N))
1171 return false;
1172 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1173 int64_t RHSC = RHS->getSExtValue();
1174 if (RHSC >= -256 && RHSC < 256) {
1175 Base = N.getOperand(0);
1176 if (Base.getOpcode() == ISD::FrameIndex) {
1177 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1178 const TargetLowering *TLI = getTargetLowering();
1179 Base = CurDAG->getTargetFrameIndex(
1180 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1181 }
1182 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1183 return true;
1184 }
1185 }
1186 return false;
1187}
1188
1190 SDLoc dl(N);
1191 SDValue ImpDef = SDValue(
1192 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1193 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1194 N);
1195}
1196
1197/// Check if the given SHL node (\p N), can be used to form an
1198/// extended register for an addressing mode.
1199bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1200 bool WantExtend, SDValue &Offset,
1201 SDValue &SignExtend) {
1202 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1203 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1204 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1205 return false;
1206
1207 SDLoc dl(N);
1208 if (WantExtend) {
1210 getExtendTypeForNode(N.getOperand(0), true);
1212 return false;
1213
1214 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1215 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1216 MVT::i32);
1217 } else {
1218 Offset = N.getOperand(0);
1219 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1220 }
1221
1222 unsigned LegalShiftVal = Log2_32(Size);
1223 unsigned ShiftVal = CSD->getZExtValue();
1224
1225 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1226 return false;
1227
1228 return isWorthFoldingAddr(N, Size);
1229}
1230
1231bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1233 SDValue &SignExtend,
1234 SDValue &DoShift) {
1235 if (N.getOpcode() != ISD::ADD)
1236 return false;
1237 SDValue LHS = N.getOperand(0);
1238 SDValue RHS = N.getOperand(1);
1239 SDLoc dl(N);
1240
1241 // We don't want to match immediate adds here, because they are better lowered
1242 // to the register-immediate addressing modes.
1243 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1244 return false;
1245
1246 // Check if this particular node is reused in any non-memory related
1247 // operation. If yes, do not try to fold this node into the address
1248 // computation, since the computation will be kept.
1249 const SDNode *Node = N.getNode();
1250 for (SDNode *UI : Node->users()) {
1251 if (!isa<MemSDNode>(*UI))
1252 return false;
1253 }
1254
1255 // Remember if it is worth folding N when it produces extended register.
1256 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1257
1258 // Try to match a shifted extend on the RHS.
1259 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1260 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1261 Base = LHS;
1262 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1263 return true;
1264 }
1265
1266 // Try to match a shifted extend on the LHS.
1267 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1268 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1269 Base = RHS;
1270 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1271 return true;
1272 }
1273
1274 // There was no shift, whatever else we find.
1275 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1276
1278 // Try to match an unshifted extend on the LHS.
1279 if (IsExtendedRegisterWorthFolding &&
1280 (Ext = getExtendTypeForNode(LHS, true)) !=
1282 Base = RHS;
1283 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1284 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1285 MVT::i32);
1286 if (isWorthFoldingAddr(LHS, Size))
1287 return true;
1288 }
1289
1290 // Try to match an unshifted extend on the RHS.
1291 if (IsExtendedRegisterWorthFolding &&
1292 (Ext = getExtendTypeForNode(RHS, true)) !=
1294 Base = LHS;
1295 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1296 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1297 MVT::i32);
1298 if (isWorthFoldingAddr(RHS, Size))
1299 return true;
1300 }
1301
1302 return false;
1303}
1304
1305// Check if the given immediate is preferred by ADD. If an immediate can be
1306// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1307// encoded by one MOVZ, return true.
1308static bool isPreferredADD(int64_t ImmOff) {
1309 // Constant in [0x0, 0xfff] can be encoded in ADD.
1310 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1311 return true;
1312 // Check if it can be encoded in an "ADD LSL #12".
1313 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1314 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1315 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1316 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1317 return false;
1318}
1319
1320bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1322 SDValue &SignExtend,
1323 SDValue &DoShift) {
1324 if (N.getOpcode() != ISD::ADD)
1325 return false;
1326 SDValue LHS = N.getOperand(0);
1327 SDValue RHS = N.getOperand(1);
1328 SDLoc DL(N);
1329
1330 // Check if this particular node is reused in any non-memory related
1331 // operation. If yes, do not try to fold this node into the address
1332 // computation, since the computation will be kept.
1333 const SDNode *Node = N.getNode();
1334 for (SDNode *UI : Node->users()) {
1335 if (!isa<MemSDNode>(*UI))
1336 return false;
1337 }
1338
1339 // Watch out if RHS is a wide immediate, it can not be selected into
1340 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1341 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1342 // instructions like:
1343 // MOV X0, WideImmediate
1344 // ADD X1, BaseReg, X0
1345 // LDR X2, [X1, 0]
1346 // For such situation, using [BaseReg, XReg] addressing mode can save one
1347 // ADD/SUB:
1348 // MOV X0, WideImmediate
1349 // LDR X2, [BaseReg, X0]
1350 if (isa<ConstantSDNode>(RHS)) {
1351 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1352 // Skip the immediate can be selected by load/store addressing mode.
1353 // Also skip the immediate can be encoded by a single ADD (SUB is also
1354 // checked by using -ImmOff).
1355 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1356 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1357 return false;
1358
1359 SDValue Ops[] = { RHS };
1360 SDNode *MOVI =
1361 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1362 SDValue MOVIV = SDValue(MOVI, 0);
1363 // This ADD of two X register will be selected into [Reg+Reg] mode.
1364 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1365 }
1366
1367 // Remember if it is worth folding N when it produces extended register.
1368 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1369
1370 // Try to match a shifted extend on the RHS.
1371 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1372 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1373 Base = LHS;
1374 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1375 return true;
1376 }
1377
1378 // Try to match a shifted extend on the LHS.
1379 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1380 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1381 Base = RHS;
1382 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1383 return true;
1384 }
1385
1386 // Match any non-shifted, non-extend, non-immediate add expression.
1387 Base = LHS;
1388 Offset = RHS;
1389 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1390 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1391 // Reg1 + Reg2 is free: no check needed.
1392 return true;
1393}
1394
1395SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1396 static const unsigned RegClassIDs[] = {
1397 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1398 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1399 AArch64::dsub2, AArch64::dsub3};
1400
1401 return createTuple(Regs, RegClassIDs, SubRegs);
1402}
1403
1404SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1405 static const unsigned RegClassIDs[] = {
1406 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1407 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1408 AArch64::qsub2, AArch64::qsub3};
1409
1410 return createTuple(Regs, RegClassIDs, SubRegs);
1411}
1412
1413SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1414 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1415 AArch64::ZPR3RegClassID,
1416 AArch64::ZPR4RegClassID};
1417 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1418 AArch64::zsub2, AArch64::zsub3};
1419
1420 return createTuple(Regs, RegClassIDs, SubRegs);
1421}
1422
1423SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1424 assert(Regs.size() == 2 || Regs.size() == 4);
1425
1426 // The createTuple interface requires 3 RegClassIDs for each possible
1427 // tuple type even though we only have them for ZPR2 and ZPR4.
1428 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1429 AArch64::ZPR4Mul4RegClassID};
1430 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1431 AArch64::zsub2, AArch64::zsub3};
1432 return createTuple(Regs, RegClassIDs, SubRegs);
1433}
1434
1435SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1436 const unsigned RegClassIDs[],
1437 const unsigned SubRegs[]) {
1438 // There's no special register-class for a vector-list of 1 element: it's just
1439 // a vector.
1440 if (Regs.size() == 1)
1441 return Regs[0];
1442
1443 assert(Regs.size() >= 2 && Regs.size() <= 4);
1444
1445 SDLoc DL(Regs[0]);
1446
1448
1449 // First operand of REG_SEQUENCE is the desired RegClass.
1450 Ops.push_back(
1451 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1452
1453 // Then we get pairs of source & subregister-position for the components.
1454 for (unsigned i = 0; i < Regs.size(); ++i) {
1455 Ops.push_back(Regs[i]);
1456 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1457 }
1458
1459 SDNode *N =
1460 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1461 return SDValue(N, 0);
1462}
1463
1464void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1465 bool isExt) {
1466 SDLoc dl(N);
1467 EVT VT = N->getValueType(0);
1468
1469 unsigned ExtOff = isExt;
1470
1471 // Form a REG_SEQUENCE to force register allocation.
1472 unsigned Vec0Off = ExtOff + 1;
1473 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1474 N->op_begin() + Vec0Off + NumVecs);
1475 SDValue RegSeq = createQTuple(Regs);
1476
1478 if (isExt)
1479 Ops.push_back(N->getOperand(1));
1480 Ops.push_back(RegSeq);
1481 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1482 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1483}
1484
1485static std::tuple<SDValue, SDValue>
1487 SDLoc DL(Disc);
1488 SDValue AddrDisc;
1489 SDValue ConstDisc;
1490
1491 // If this is a blend, remember the constant and address discriminators.
1492 // Otherwise, it's either a constant discriminator, or a non-blended
1493 // address discriminator.
1494 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1495 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1496 AddrDisc = Disc->getOperand(1);
1497 ConstDisc = Disc->getOperand(2);
1498 } else {
1499 ConstDisc = Disc;
1500 }
1501
1502 // If the constant discriminator (either the blend RHS, or the entire
1503 // discriminator value) isn't a 16-bit constant, bail out, and let the
1504 // discriminator be computed separately.
1505 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1506 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1507 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1508
1509 // If there's no address discriminator, use XZR directly.
1510 if (!AddrDisc)
1511 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1512
1513 return std::make_tuple(
1514 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1515 AddrDisc);
1516}
1517
1518void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1519 SDLoc DL(N);
1520 // IntrinsicID is operand #0
1521 SDValue Val = N->getOperand(1);
1522 SDValue AUTKey = N->getOperand(2);
1523 SDValue AUTDisc = N->getOperand(3);
1524
1525 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1526 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1527
1528 SDValue AUTAddrDisc, AUTConstDisc;
1529 std::tie(AUTConstDisc, AUTAddrDisc) =
1530 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1531
1532 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1533 AArch64::X16, Val, SDValue());
1534 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1535
1536 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUT, DL, MVT::i64, Ops);
1537 ReplaceNode(N, AUT);
1538}
1539
1540void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1541 SDLoc DL(N);
1542 // IntrinsicID is operand #0
1543 SDValue Val = N->getOperand(1);
1544 SDValue AUTKey = N->getOperand(2);
1545 SDValue AUTDisc = N->getOperand(3);
1546 SDValue PACKey = N->getOperand(4);
1547 SDValue PACDisc = N->getOperand(5);
1548
1549 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1550 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1551
1552 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1553 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1554
1555 SDValue AUTAddrDisc, AUTConstDisc;
1556 std::tie(AUTConstDisc, AUTAddrDisc) =
1557 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1558
1559 SDValue PACAddrDisc, PACConstDisc;
1560 std::tie(PACConstDisc, PACAddrDisc) =
1561 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1562
1563 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1564 AArch64::X16, Val, SDValue());
1565
1566 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1567 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1568
1569 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1570 ReplaceNode(N, AUTPAC);
1571}
1572
1573bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1574 LoadSDNode *LD = cast<LoadSDNode>(N);
1575 if (LD->isUnindexed())
1576 return false;
1577 EVT VT = LD->getMemoryVT();
1578 EVT DstVT = N->getValueType(0);
1579 ISD::MemIndexedMode AM = LD->getAddressingMode();
1580 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1581
1582 // We're not doing validity checking here. That was done when checking
1583 // if we should mark the load as indexed or not. We're just selecting
1584 // the right instruction.
1585 unsigned Opcode = 0;
1586
1587 ISD::LoadExtType ExtType = LD->getExtensionType();
1588 bool InsertTo64 = false;
1589 if (VT == MVT::i64)
1590 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1591 else if (VT == MVT::i32) {
1592 if (ExtType == ISD::NON_EXTLOAD)
1593 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1594 else if (ExtType == ISD::SEXTLOAD)
1595 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1596 else {
1597 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1598 InsertTo64 = true;
1599 // The result of the load is only i32. It's the subreg_to_reg that makes
1600 // it into an i64.
1601 DstVT = MVT::i32;
1602 }
1603 } else if (VT == MVT::i16) {
1604 if (ExtType == ISD::SEXTLOAD) {
1605 if (DstVT == MVT::i64)
1606 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1607 else
1608 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1609 } else {
1610 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1611 InsertTo64 = DstVT == MVT::i64;
1612 // The result of the load is only i32. It's the subreg_to_reg that makes
1613 // it into an i64.
1614 DstVT = MVT::i32;
1615 }
1616 } else if (VT == MVT::i8) {
1617 if (ExtType == ISD::SEXTLOAD) {
1618 if (DstVT == MVT::i64)
1619 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1620 else
1621 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1622 } else {
1623 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1624 InsertTo64 = DstVT == MVT::i64;
1625 // The result of the load is only i32. It's the subreg_to_reg that makes
1626 // it into an i64.
1627 DstVT = MVT::i32;
1628 }
1629 } else if (VT == MVT::f16) {
1630 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1631 } else if (VT == MVT::bf16) {
1632 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1633 } else if (VT == MVT::f32) {
1634 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1635 } else if (VT == MVT::f64 || VT.is64BitVector()) {
1636 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1637 } else if (VT.is128BitVector()) {
1638 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1639 } else
1640 return false;
1641 SDValue Chain = LD->getChain();
1642 SDValue Base = LD->getBasePtr();
1643 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1644 int OffsetVal = (int)OffsetOp->getZExtValue();
1645 SDLoc dl(N);
1646 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1647 SDValue Ops[] = { Base, Offset, Chain };
1648 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1649 MVT::Other, Ops);
1650
1651 // Transfer memoperands.
1652 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1653 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1654
1655 // Either way, we're replacing the node, so tell the caller that.
1656 SDValue LoadedVal = SDValue(Res, 1);
1657 if (InsertTo64) {
1658 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1659 LoadedVal =
1660 SDValue(CurDAG->getMachineNode(
1661 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1662 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1663 SubReg),
1664 0);
1665 }
1666
1667 ReplaceUses(SDValue(N, 0), LoadedVal);
1668 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1669 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1670 CurDAG->RemoveDeadNode(N);
1671 return true;
1672}
1673
1674void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1675 unsigned SubRegIdx) {
1676 SDLoc dl(N);
1677 EVT VT = N->getValueType(0);
1678 SDValue Chain = N->getOperand(0);
1679
1680 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1681 Chain};
1682
1683 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1684
1685 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1686 SDValue SuperReg = SDValue(Ld, 0);
1687 for (unsigned i = 0; i < NumVecs; ++i)
1688 ReplaceUses(SDValue(N, i),
1689 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1690
1691 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1692
1693 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1694 // because it's too simple to have needed special treatment during lowering.
1695 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1696 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1697 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1698 }
1699
1700 CurDAG->RemoveDeadNode(N);
1701}
1702
1703void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1704 unsigned Opc, unsigned SubRegIdx) {
1705 SDLoc dl(N);
1706 EVT VT = N->getValueType(0);
1707 SDValue Chain = N->getOperand(0);
1708
1709 SDValue Ops[] = {N->getOperand(1), // Mem operand
1710 N->getOperand(2), // Incremental
1711 Chain};
1712
1713 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1714 MVT::Untyped, MVT::Other};
1715
1716 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1717
1718 // Update uses of write back register
1719 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1720
1721 // Update uses of vector list
1722 SDValue SuperReg = SDValue(Ld, 1);
1723 if (NumVecs == 1)
1724 ReplaceUses(SDValue(N, 0), SuperReg);
1725 else
1726 for (unsigned i = 0; i < NumVecs; ++i)
1727 ReplaceUses(SDValue(N, i),
1728 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1729
1730 // Update the chain
1731 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1732 CurDAG->RemoveDeadNode(N);
1733}
1734
1735/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1736/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1737/// new Base and an SDValue representing the new offset.
1738std::tuple<unsigned, SDValue, SDValue>
1739AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1740 unsigned Opc_ri,
1741 const SDValue &OldBase,
1742 const SDValue &OldOffset,
1743 unsigned Scale) {
1744 SDValue NewBase = OldBase;
1745 SDValue NewOffset = OldOffset;
1746 // Detect a possible Reg+Imm addressing mode.
1747 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1748 N, OldBase, NewBase, NewOffset);
1749
1750 // Detect a possible reg+reg addressing mode, but only if we haven't already
1751 // detected a Reg+Imm one.
1752 const bool IsRegReg =
1753 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1754
1755 // Select the instruction.
1756 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1757}
1758
1759enum class SelectTypeKind {
1760 Int1 = 0,
1761 Int = 1,
1762 FP = 2,
1763 AnyType = 3,
1764};
1765
1766/// This function selects an opcode from a list of opcodes, which is
1767/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1768/// element types, in this order.
1769template <SelectTypeKind Kind>
1770static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1771 // Only match scalable vector VTs
1772 if (!VT.isScalableVector())
1773 return 0;
1774
1775 EVT EltVT = VT.getVectorElementType();
1776 unsigned Key = VT.getVectorMinNumElements();
1777 switch (Kind) {
1779 break;
1781 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1782 EltVT != MVT::i64)
1783 return 0;
1784 break;
1786 if (EltVT != MVT::i1)
1787 return 0;
1788 break;
1789 case SelectTypeKind::FP:
1790 if (EltVT == MVT::bf16)
1791 Key = 16;
1792 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1793 EltVT != MVT::f64)
1794 return 0;
1795 break;
1796 }
1797
1798 unsigned Offset;
1799 switch (Key) {
1800 case 16: // 8-bit or bf16
1801 Offset = 0;
1802 break;
1803 case 8: // 16-bit
1804 Offset = 1;
1805 break;
1806 case 4: // 32-bit
1807 Offset = 2;
1808 break;
1809 case 2: // 64-bit
1810 Offset = 3;
1811 break;
1812 default:
1813 return 0;
1814 }
1815
1816 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1817}
1818
1819// This function is almost identical to SelectWhilePair, but has an
1820// extra check on the range of the immediate operand.
1821// TODO: Merge these two functions together at some point?
1822void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1823 // Immediate can be either 0 or 1.
1824 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1825 if (Imm->getZExtValue() > 1)
1826 return;
1827
1828 SDLoc DL(N);
1829 EVT VT = N->getValueType(0);
1830 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1831 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1832 SDValue SuperReg = SDValue(WhilePair, 0);
1833
1834 for (unsigned I = 0; I < 2; ++I)
1835 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1836 AArch64::psub0 + I, DL, VT, SuperReg));
1837
1838 CurDAG->RemoveDeadNode(N);
1839}
1840
1841void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1842 SDLoc DL(N);
1843 EVT VT = N->getValueType(0);
1844
1845 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1846
1847 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1848 SDValue SuperReg = SDValue(WhilePair, 0);
1849
1850 for (unsigned I = 0; I < 2; ++I)
1851 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1852 AArch64::psub0 + I, DL, VT, SuperReg));
1853
1854 CurDAG->RemoveDeadNode(N);
1855}
1856
1857void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1858 unsigned Opcode) {
1859 EVT VT = N->getValueType(0);
1860 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1861 SDValue Ops = createZTuple(Regs);
1862 SDLoc DL(N);
1863 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1864 SDValue SuperReg = SDValue(Intrinsic, 0);
1865 for (unsigned i = 0; i < NumVecs; ++i)
1866 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1867 AArch64::zsub0 + i, DL, VT, SuperReg));
1868
1869 CurDAG->RemoveDeadNode(N);
1870}
1871
1872void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
1873 unsigned Opcode) {
1874 SDLoc DL(N);
1875 EVT VT = N->getValueType(0);
1876 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
1877 Ops.push_back(/*Chain*/ N->getOperand(0));
1878
1880 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
1881 SDValue SuperReg = SDValue(Instruction, 0);
1882
1883 for (unsigned i = 0; i < NumVecs; ++i)
1884 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1885 AArch64::zsub0 + i, DL, VT, SuperReg));
1886
1887 // Copy chain
1888 unsigned ChainIdx = NumVecs;
1889 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
1890 CurDAG->RemoveDeadNode(N);
1891}
1892
1893void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1894 unsigned NumVecs,
1895 bool IsZmMulti,
1896 unsigned Opcode,
1897 bool HasPred) {
1898 assert(Opcode != 0 && "Unexpected opcode");
1899
1900 SDLoc DL(N);
1901 EVT VT = N->getValueType(0);
1902 unsigned FirstVecIdx = HasPred ? 2 : 1;
1903
1904 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1905 SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs));
1906 return createZMulTuple(Regs);
1907 };
1908
1909 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1910
1911 SDValue Zm;
1912 if (IsZmMulti)
1913 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1914 else
1915 Zm = N->getOperand(NumVecs + FirstVecIdx);
1916
1918 if (HasPred)
1919 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1920 N->getOperand(1), Zdn, Zm);
1921 else
1922 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1923 SDValue SuperReg = SDValue(Intrinsic, 0);
1924 for (unsigned i = 0; i < NumVecs; ++i)
1925 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1926 AArch64::zsub0 + i, DL, VT, SuperReg));
1927
1928 CurDAG->RemoveDeadNode(N);
1929}
1930
1931void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1932 unsigned Scale, unsigned Opc_ri,
1933 unsigned Opc_rr, bool IsIntr) {
1934 assert(Scale < 5 && "Invalid scaling value.");
1935 SDLoc DL(N);
1936 EVT VT = N->getValueType(0);
1937 SDValue Chain = N->getOperand(0);
1938
1939 // Optimize addressing mode.
1941 unsigned Opc;
1942 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1943 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1944 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1945
1946 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
1947 Base, // Memory operand
1948 Offset, Chain};
1949
1950 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1951
1952 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1953 SDValue SuperReg = SDValue(Load, 0);
1954 for (unsigned i = 0; i < NumVecs; ++i)
1955 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1956 AArch64::zsub0 + i, DL, VT, SuperReg));
1957
1958 // Copy chain
1959 unsigned ChainIdx = NumVecs;
1960 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1961 CurDAG->RemoveDeadNode(N);
1962}
1963
1964void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
1965 unsigned NumVecs,
1966 unsigned Scale,
1967 unsigned Opc_ri,
1968 unsigned Opc_rr) {
1969 assert(Scale < 4 && "Invalid scaling value.");
1970 SDLoc DL(N);
1971 EVT VT = N->getValueType(0);
1972 SDValue Chain = N->getOperand(0);
1973
1974 SDValue PNg = N->getOperand(2);
1975 SDValue Base = N->getOperand(3);
1976 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
1977 unsigned Opc;
1978 std::tie(Opc, Base, Offset) =
1979 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
1980
1981 SDValue Ops[] = {PNg, // Predicate-as-counter
1982 Base, // Memory operand
1983 Offset, Chain};
1984
1985 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1986
1987 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1988 SDValue SuperReg = SDValue(Load, 0);
1989 for (unsigned i = 0; i < NumVecs; ++i)
1990 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1991 AArch64::zsub0 + i, DL, VT, SuperReg));
1992
1993 // Copy chain
1994 unsigned ChainIdx = NumVecs;
1995 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1996 CurDAG->RemoveDeadNode(N);
1997}
1998
1999void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2000 unsigned Opcode) {
2001 if (N->getValueType(0) != MVT::nxv4f32)
2002 return;
2003 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2004}
2005
2006void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2007 unsigned NumOutVecs,
2008 unsigned Opc,
2009 uint32_t MaxImm) {
2010 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2011 if (Imm->getZExtValue() > MaxImm)
2012 return;
2013
2014 SDValue ZtValue;
2015 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2016 return;
2017
2018 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
2019 SDLoc DL(Node);
2020 EVT VT = Node->getValueType(0);
2021
2023 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2024 SDValue SuperReg = SDValue(Instruction, 0);
2025
2026 for (unsigned I = 0; I < NumOutVecs; ++I)
2027 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2028 AArch64::zsub0 + I, DL, VT, SuperReg));
2029
2030 // Copy chain
2031 unsigned ChainIdx = NumOutVecs;
2032 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2033 CurDAG->RemoveDeadNode(Node);
2034}
2035
2036void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2037 unsigned NumOutVecs,
2038 unsigned Opc) {
2039
2040 SDValue ZtValue;
2042 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2043 return;
2044
2045 Ops.push_back(ZtValue);
2046 Ops.push_back(createZMulTuple({Node->getOperand(3), Node->getOperand(4)}));
2047 SDLoc DL(Node);
2048 EVT VT = Node->getValueType(0);
2049
2051 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2052 SDValue SuperReg = SDValue(Instruction, 0);
2053
2054 for (unsigned I = 0; I < NumOutVecs; ++I)
2055 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2056 AArch64::zsub0 + I, DL, VT, SuperReg));
2057
2058 // Copy chain
2059 unsigned ChainIdx = NumOutVecs;
2060 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2061 CurDAG->RemoveDeadNode(Node);
2062}
2063
2064void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2065 unsigned Op) {
2066 SDLoc DL(N);
2067 EVT VT = N->getValueType(0);
2068
2069 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2070 SDValue Zd = createZMulTuple(Regs);
2071 SDValue Zn = N->getOperand(1 + NumVecs);
2072 SDValue Zm = N->getOperand(2 + NumVecs);
2073
2074 SDValue Ops[] = {Zd, Zn, Zm};
2075
2076 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2077 SDValue SuperReg = SDValue(Intrinsic, 0);
2078 for (unsigned i = 0; i < NumVecs; ++i)
2079 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2080 AArch64::zsub0 + i, DL, VT, SuperReg));
2081
2082 CurDAG->RemoveDeadNode(N);
2083}
2084
2085bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2086 switch (BaseReg) {
2087 default:
2088 return false;
2089 case AArch64::ZA:
2090 case AArch64::ZAB0:
2091 if (TileNum == 0)
2092 break;
2093 return false;
2094 case AArch64::ZAH0:
2095 if (TileNum <= 1)
2096 break;
2097 return false;
2098 case AArch64::ZAS0:
2099 if (TileNum <= 3)
2100 break;
2101 return false;
2102 case AArch64::ZAD0:
2103 if (TileNum <= 7)
2104 break;
2105 return false;
2106 }
2107
2108 BaseReg += TileNum;
2109 return true;
2110}
2111
2112template <unsigned MaxIdx, unsigned Scale>
2113void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2114 unsigned BaseReg, unsigned Op) {
2115 unsigned TileNum = 0;
2116 if (BaseReg != AArch64::ZA)
2117 TileNum = N->getConstantOperandVal(2);
2118
2119 if (!SelectSMETile(BaseReg, TileNum))
2120 return;
2121
2122 SDValue SliceBase, Base, Offset;
2123 if (BaseReg == AArch64::ZA)
2124 SliceBase = N->getOperand(2);
2125 else
2126 SliceBase = N->getOperand(3);
2127
2128 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2129 return;
2130
2131 SDLoc DL(N);
2132 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2133 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2134 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2135
2136 EVT VT = N->getValueType(0);
2137 for (unsigned I = 0; I < NumVecs; ++I)
2138 ReplaceUses(SDValue(N, I),
2139 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2140 SDValue(Mov, 0)));
2141 // Copy chain
2142 unsigned ChainIdx = NumVecs;
2143 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2144 CurDAG->RemoveDeadNode(N);
2145}
2146
2147void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2148 unsigned Op, unsigned MaxIdx,
2149 unsigned Scale, unsigned BaseReg) {
2150 // Slice can be in different positions
2151 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2152 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2153 SDValue SliceBase = N->getOperand(2);
2154 if (BaseReg != AArch64::ZA)
2155 SliceBase = N->getOperand(3);
2156
2158 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2159 return;
2160 // The correct Za tile number is computed in Machine Instruction
2161 // See EmitZAInstr
2162 // DAG cannot select Za tile as an output register with ZReg
2163 SDLoc DL(N);
2165 if (BaseReg != AArch64::ZA )
2166 Ops.push_back(N->getOperand(2));
2167 Ops.push_back(Base);
2168 Ops.push_back(Offset);
2169 Ops.push_back(N->getOperand(0)); //Chain
2170 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2171
2172 EVT VT = N->getValueType(0);
2173 for (unsigned I = 0; I < NumVecs; ++I)
2174 ReplaceUses(SDValue(N, I),
2175 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2176 SDValue(Mov, 0)));
2177
2178 // Copy chain
2179 unsigned ChainIdx = NumVecs;
2180 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2181 CurDAG->RemoveDeadNode(N);
2182}
2183
2184void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2185 unsigned NumOutVecs,
2186 bool IsTupleInput,
2187 unsigned Opc) {
2188 SDLoc DL(N);
2189 EVT VT = N->getValueType(0);
2190 unsigned NumInVecs = N->getNumOperands() - 1;
2191
2193 if (IsTupleInput) {
2194 assert((NumInVecs == 2 || NumInVecs == 4) &&
2195 "Don't know how to handle multi-register input!");
2196 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2197 Ops.push_back(createZMulTuple(Regs));
2198 } else {
2199 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2200 for (unsigned I = 0; I < NumInVecs; I++)
2201 Ops.push_back(N->getOperand(1 + I));
2202 }
2203
2204 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2205 SDValue SuperReg = SDValue(Res, 0);
2206
2207 for (unsigned I = 0; I < NumOutVecs; I++)
2208 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2209 AArch64::zsub0 + I, DL, VT, SuperReg));
2210 CurDAG->RemoveDeadNode(N);
2211}
2212
2213void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2214 unsigned Opc) {
2215 SDLoc dl(N);
2216 EVT VT = N->getOperand(2)->getValueType(0);
2217
2218 // Form a REG_SEQUENCE to force register allocation.
2219 bool Is128Bit = VT.getSizeInBits() == 128;
2220 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2221 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2222
2223 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2224 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2225
2226 // Transfer memoperands.
2227 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2228 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2229
2230 ReplaceNode(N, St);
2231}
2232
2233void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2234 unsigned Scale, unsigned Opc_rr,
2235 unsigned Opc_ri) {
2236 SDLoc dl(N);
2237
2238 // Form a REG_SEQUENCE to force register allocation.
2239 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2240 SDValue RegSeq = createZTuple(Regs);
2241
2242 // Optimize addressing mode.
2243 unsigned Opc;
2245 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2246 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2247 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2248
2249 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2250 Base, // address
2251 Offset, // offset
2252 N->getOperand(0)}; // chain
2253 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2254
2255 ReplaceNode(N, St);
2256}
2257
2258bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2259 SDValue &OffImm) {
2260 SDLoc dl(N);
2261 const DataLayout &DL = CurDAG->getDataLayout();
2262 const TargetLowering *TLI = getTargetLowering();
2263
2264 // Try to match it for the frame address
2265 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2266 int FI = FINode->getIndex();
2267 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2268 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2269 return true;
2270 }
2271
2272 return false;
2273}
2274
2275void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2276 unsigned Opc) {
2277 SDLoc dl(N);
2278 EVT VT = N->getOperand(2)->getValueType(0);
2279 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2280 MVT::Other}; // Type for the Chain
2281
2282 // Form a REG_SEQUENCE to force register allocation.
2283 bool Is128Bit = VT.getSizeInBits() == 128;
2284 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2285 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2286
2287 SDValue Ops[] = {RegSeq,
2288 N->getOperand(NumVecs + 1), // base register
2289 N->getOperand(NumVecs + 2), // Incremental
2290 N->getOperand(0)}; // Chain
2291 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2292
2293 ReplaceNode(N, St);
2294}
2295
2296namespace {
2297/// WidenVector - Given a value in the V64 register class, produce the
2298/// equivalent value in the V128 register class.
2299class WidenVector {
2300 SelectionDAG &DAG;
2301
2302public:
2303 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2304
2305 SDValue operator()(SDValue V64Reg) {
2306 EVT VT = V64Reg.getValueType();
2307 unsigned NarrowSize = VT.getVectorNumElements();
2308 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2309 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2310 SDLoc DL(V64Reg);
2311
2312 SDValue Undef =
2313 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2314 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2315 }
2316};
2317} // namespace
2318
2319/// NarrowVector - Given a value in the V128 register class, produce the
2320/// equivalent value in the V64 register class.
2322 EVT VT = V128Reg.getValueType();
2323 unsigned WideSize = VT.getVectorNumElements();
2324 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2325 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2326
2327 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2328 V128Reg);
2329}
2330
2331void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2332 unsigned Opc) {
2333 SDLoc dl(N);
2334 EVT VT = N->getValueType(0);
2335 bool Narrow = VT.getSizeInBits() == 64;
2336
2337 // Form a REG_SEQUENCE to force register allocation.
2338 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2339
2340 if (Narrow)
2341 transform(Regs, Regs.begin(),
2342 WidenVector(*CurDAG));
2343
2344 SDValue RegSeq = createQTuple(Regs);
2345
2346 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2347
2348 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2349
2350 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2351 N->getOperand(NumVecs + 3), N->getOperand(0)};
2352 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2353 SDValue SuperReg = SDValue(Ld, 0);
2354
2355 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2356 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2357 AArch64::qsub2, AArch64::qsub3 };
2358 for (unsigned i = 0; i < NumVecs; ++i) {
2359 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2360 if (Narrow)
2361 NV = NarrowVector(NV, *CurDAG);
2362 ReplaceUses(SDValue(N, i), NV);
2363 }
2364
2365 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2366 CurDAG->RemoveDeadNode(N);
2367}
2368
2369void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2370 unsigned Opc) {
2371 SDLoc dl(N);
2372 EVT VT = N->getValueType(0);
2373 bool Narrow = VT.getSizeInBits() == 64;
2374
2375 // Form a REG_SEQUENCE to force register allocation.
2376 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2377
2378 if (Narrow)
2379 transform(Regs, Regs.begin(),
2380 WidenVector(*CurDAG));
2381
2382 SDValue RegSeq = createQTuple(Regs);
2383
2384 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2385 RegSeq->getValueType(0), MVT::Other};
2386
2387 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2388
2389 SDValue Ops[] = {RegSeq,
2390 CurDAG->getTargetConstant(LaneNo, dl,
2391 MVT::i64), // Lane Number
2392 N->getOperand(NumVecs + 2), // Base register
2393 N->getOperand(NumVecs + 3), // Incremental
2394 N->getOperand(0)};
2395 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2396
2397 // Update uses of the write back register
2398 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2399
2400 // Update uses of the vector list
2401 SDValue SuperReg = SDValue(Ld, 1);
2402 if (NumVecs == 1) {
2403 ReplaceUses(SDValue(N, 0),
2404 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2405 } else {
2406 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2407 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2408 AArch64::qsub2, AArch64::qsub3 };
2409 for (unsigned i = 0; i < NumVecs; ++i) {
2410 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2411 SuperReg);
2412 if (Narrow)
2413 NV = NarrowVector(NV, *CurDAG);
2414 ReplaceUses(SDValue(N, i), NV);
2415 }
2416 }
2417
2418 // Update the Chain
2419 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2420 CurDAG->RemoveDeadNode(N);
2421}
2422
2423void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2424 unsigned Opc) {
2425 SDLoc dl(N);
2426 EVT VT = N->getOperand(2)->getValueType(0);
2427 bool Narrow = VT.getSizeInBits() == 64;
2428
2429 // Form a REG_SEQUENCE to force register allocation.
2430 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2431
2432 if (Narrow)
2433 transform(Regs, Regs.begin(),
2434 WidenVector(*CurDAG));
2435
2436 SDValue RegSeq = createQTuple(Regs);
2437
2438 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2439
2440 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2441 N->getOperand(NumVecs + 3), N->getOperand(0)};
2442 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2443
2444 // Transfer memoperands.
2445 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2446 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2447
2448 ReplaceNode(N, St);
2449}
2450
2451void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2452 unsigned Opc) {
2453 SDLoc dl(N);
2454 EVT VT = N->getOperand(2)->getValueType(0);
2455 bool Narrow = VT.getSizeInBits() == 64;
2456
2457 // Form a REG_SEQUENCE to force register allocation.
2458 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2459
2460 if (Narrow)
2461 transform(Regs, Regs.begin(),
2462 WidenVector(*CurDAG));
2463
2464 SDValue RegSeq = createQTuple(Regs);
2465
2466 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2467 MVT::Other};
2468
2469 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2470
2471 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2472 N->getOperand(NumVecs + 2), // Base Register
2473 N->getOperand(NumVecs + 3), // Incremental
2474 N->getOperand(0)};
2475 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2476
2477 // Transfer memoperands.
2478 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2479 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2480
2481 ReplaceNode(N, St);
2482}
2483
2485 unsigned &Opc, SDValue &Opd0,
2486 unsigned &LSB, unsigned &MSB,
2487 unsigned NumberOfIgnoredLowBits,
2488 bool BiggerPattern) {
2489 assert(N->getOpcode() == ISD::AND &&
2490 "N must be a AND operation to call this function");
2491
2492 EVT VT = N->getValueType(0);
2493
2494 // Here we can test the type of VT and return false when the type does not
2495 // match, but since it is done prior to that call in the current context
2496 // we turned that into an assert to avoid redundant code.
2497 assert((VT == MVT::i32 || VT == MVT::i64) &&
2498 "Type checking must have been done before calling this function");
2499
2500 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2501 // changed the AND node to a 32-bit mask operation. We'll have to
2502 // undo that as part of the transform here if we want to catch all
2503 // the opportunities.
2504 // Currently the NumberOfIgnoredLowBits argument helps to recover
2505 // from these situations when matching bigger pattern (bitfield insert).
2506
2507 // For unsigned extracts, check for a shift right and mask
2508 uint64_t AndImm = 0;
2509 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2510 return false;
2511
2512 const SDNode *Op0 = N->getOperand(0).getNode();
2513
2514 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2515 // simplified. Try to undo that
2516 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2517
2518 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2519 if (AndImm & (AndImm + 1))
2520 return false;
2521
2522 bool ClampMSB = false;
2523 uint64_t SrlImm = 0;
2524 // Handle the SRL + ANY_EXTEND case.
2525 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2526 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2527 // Extend the incoming operand of the SRL to 64-bit.
2528 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2529 // Make sure to clamp the MSB so that we preserve the semantics of the
2530 // original operations.
2531 ClampMSB = true;
2532 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2534 SrlImm)) {
2535 // If the shift result was truncated, we can still combine them.
2536 Opd0 = Op0->getOperand(0).getOperand(0);
2537
2538 // Use the type of SRL node.
2539 VT = Opd0->getValueType(0);
2540 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2541 Opd0 = Op0->getOperand(0);
2542 ClampMSB = (VT == MVT::i32);
2543 } else if (BiggerPattern) {
2544 // Let's pretend a 0 shift right has been performed.
2545 // The resulting code will be at least as good as the original one
2546 // plus it may expose more opportunities for bitfield insert pattern.
2547 // FIXME: Currently we limit this to the bigger pattern, because
2548 // some optimizations expect AND and not UBFM.
2549 Opd0 = N->getOperand(0);
2550 } else
2551 return false;
2552
2553 // Bail out on large immediates. This happens when no proper
2554 // combining/constant folding was performed.
2555 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2556 LLVM_DEBUG(
2557 (dbgs() << N
2558 << ": Found large shift immediate, this should not happen\n"));
2559 return false;
2560 }
2561
2562 LSB = SrlImm;
2563 MSB = SrlImm +
2564 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2565 : llvm::countr_one<uint64_t>(AndImm)) -
2566 1;
2567 if (ClampMSB)
2568 // Since we're moving the extend before the right shift operation, we need
2569 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2570 // the zeros which would get shifted in with the original right shift
2571 // operation.
2572 MSB = MSB > 31 ? 31 : MSB;
2573
2574 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2575 return true;
2576}
2577
2578static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2579 SDValue &Opd0, unsigned &Immr,
2580 unsigned &Imms) {
2581 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2582
2583 EVT VT = N->getValueType(0);
2584 unsigned BitWidth = VT.getSizeInBits();
2585 assert((VT == MVT::i32 || VT == MVT::i64) &&
2586 "Type checking must have been done before calling this function");
2587
2588 SDValue Op = N->getOperand(0);
2589 if (Op->getOpcode() == ISD::TRUNCATE) {
2590 Op = Op->getOperand(0);
2591 VT = Op->getValueType(0);
2592 BitWidth = VT.getSizeInBits();
2593 }
2594
2595 uint64_t ShiftImm;
2596 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2597 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2598 return false;
2599
2600 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2601 if (ShiftImm + Width > BitWidth)
2602 return false;
2603
2604 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2605 Opd0 = Op.getOperand(0);
2606 Immr = ShiftImm;
2607 Imms = ShiftImm + Width - 1;
2608 return true;
2609}
2610
2611static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2612 SDValue &Opd0, unsigned &LSB,
2613 unsigned &MSB) {
2614 // We are looking for the following pattern which basically extracts several
2615 // continuous bits from the source value and places it from the LSB of the
2616 // destination value, all other bits of the destination value or set to zero:
2617 //
2618 // Value2 = AND Value, MaskImm
2619 // SRL Value2, ShiftImm
2620 //
2621 // with MaskImm >> ShiftImm to search for the bit width.
2622 //
2623 // This gets selected into a single UBFM:
2624 //
2625 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2626 //
2627
2628 if (N->getOpcode() != ISD::SRL)
2629 return false;
2630
2631 uint64_t AndMask = 0;
2632 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2633 return false;
2634
2635 Opd0 = N->getOperand(0).getOperand(0);
2636
2637 uint64_t SrlImm = 0;
2638 if (!isIntImmediate(N->getOperand(1), SrlImm))
2639 return false;
2640
2641 // Check whether we really have several bits extract here.
2642 if (!isMask_64(AndMask >> SrlImm))
2643 return false;
2644
2645 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2646 LSB = SrlImm;
2647 MSB = llvm::Log2_64(AndMask);
2648 return true;
2649}
2650
2651static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2652 unsigned &Immr, unsigned &Imms,
2653 bool BiggerPattern) {
2654 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2655 "N must be a SHR/SRA operation to call this function");
2656
2657 EVT VT = N->getValueType(0);
2658
2659 // Here we can test the type of VT and return false when the type does not
2660 // match, but since it is done prior to that call in the current context
2661 // we turned that into an assert to avoid redundant code.
2662 assert((VT == MVT::i32 || VT == MVT::i64) &&
2663 "Type checking must have been done before calling this function");
2664
2665 // Check for AND + SRL doing several bits extract.
2666 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2667 return true;
2668
2669 // We're looking for a shift of a shift.
2670 uint64_t ShlImm = 0;
2671 uint64_t TruncBits = 0;
2672 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2673 Opd0 = N->getOperand(0).getOperand(0);
2674 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2675 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2676 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2677 // be considered as setting high 32 bits as zero. Our strategy here is to
2678 // always generate 64bit UBFM. This consistency will help the CSE pass
2679 // later find more redundancy.
2680 Opd0 = N->getOperand(0).getOperand(0);
2681 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2682 VT = Opd0.getValueType();
2683 assert(VT == MVT::i64 && "the promoted type should be i64");
2684 } else if (BiggerPattern) {
2685 // Let's pretend a 0 shift left has been performed.
2686 // FIXME: Currently we limit this to the bigger pattern case,
2687 // because some optimizations expect AND and not UBFM
2688 Opd0 = N->getOperand(0);
2689 } else
2690 return false;
2691
2692 // Missing combines/constant folding may have left us with strange
2693 // constants.
2694 if (ShlImm >= VT.getSizeInBits()) {
2695 LLVM_DEBUG(
2696 (dbgs() << N
2697 << ": Found large shift immediate, this should not happen\n"));
2698 return false;
2699 }
2700
2701 uint64_t SrlImm = 0;
2702 if (!isIntImmediate(N->getOperand(1), SrlImm))
2703 return false;
2704
2705 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2706 "bad amount in shift node!");
2707 int immr = SrlImm - ShlImm;
2708 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2709 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2710 // SRA requires a signed extraction
2711 if (VT == MVT::i32)
2712 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2713 else
2714 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2715 return true;
2716}
2717
2718bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2719 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2720
2721 EVT VT = N->getValueType(0);
2722 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2723 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2724 return false;
2725
2726 uint64_t ShiftImm;
2727 SDValue Op = N->getOperand(0);
2728 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2729 return false;
2730
2731 SDLoc dl(N);
2732 // Extend the incoming operand of the shift to 64-bits.
2733 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2734 unsigned Immr = ShiftImm;
2735 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2736 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2737 CurDAG->getTargetConstant(Imms, dl, VT)};
2738 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2739 return true;
2740}
2741
2742static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2743 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2744 unsigned NumberOfIgnoredLowBits = 0,
2745 bool BiggerPattern = false) {
2746 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2747 return false;
2748
2749 switch (N->getOpcode()) {
2750 default:
2751 if (!N->isMachineOpcode())
2752 return false;
2753 break;
2754 case ISD::AND:
2755 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2756 NumberOfIgnoredLowBits, BiggerPattern);
2757 case ISD::SRL:
2758 case ISD::SRA:
2759 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2760
2762 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2763 }
2764
2765 unsigned NOpc = N->getMachineOpcode();
2766 switch (NOpc) {
2767 default:
2768 return false;
2769 case AArch64::SBFMWri:
2770 case AArch64::UBFMWri:
2771 case AArch64::SBFMXri:
2772 case AArch64::UBFMXri:
2773 Opc = NOpc;
2774 Opd0 = N->getOperand(0);
2775 Immr = N->getConstantOperandVal(1);
2776 Imms = N->getConstantOperandVal(2);
2777 return true;
2778 }
2779 // Unreachable
2780 return false;
2781}
2782
2783bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2784 unsigned Opc, Immr, Imms;
2785 SDValue Opd0;
2786 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2787 return false;
2788
2789 EVT VT = N->getValueType(0);
2790 SDLoc dl(N);
2791
2792 // If the bit extract operation is 64bit but the original type is 32bit, we
2793 // need to add one EXTRACT_SUBREG.
2794 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2795 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2796 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2797
2798 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2799 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2800 MVT::i32, SDValue(BFM, 0));
2801 ReplaceNode(N, Inner.getNode());
2802 return true;
2803 }
2804
2805 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2806 CurDAG->getTargetConstant(Imms, dl, VT)};
2807 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2808 return true;
2809}
2810
2811/// Does DstMask form a complementary pair with the mask provided by
2812/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2813/// this asks whether DstMask zeroes precisely those bits that will be set by
2814/// the other half.
2815static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2816 unsigned NumberOfIgnoredHighBits, EVT VT) {
2817 assert((VT == MVT::i32 || VT == MVT::i64) &&
2818 "i32 or i64 mask type expected!");
2819 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2820
2821 // Enable implicitTrunc as we're intentionally ignoring high bits.
2822 APInt SignificantDstMask =
2823 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
2824 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2825
2826 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2827 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2828}
2829
2830// Look for bits that will be useful for later uses.
2831// A bit is consider useless as soon as it is dropped and never used
2832// before it as been dropped.
2833// E.g., looking for useful bit of x
2834// 1. y = x & 0x7
2835// 2. z = y >> 2
2836// After #1, x useful bits are 0x7, then the useful bits of x, live through
2837// y.
2838// After #2, the useful bits of x are 0x4.
2839// However, if x is used on an unpredicatable instruction, then all its bits
2840// are useful.
2841// E.g.
2842// 1. y = x & 0x7
2843// 2. z = y >> 2
2844// 3. str x, [@x]
2845static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2846
2848 unsigned Depth) {
2849 uint64_t Imm =
2850 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2851 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2852 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2853 getUsefulBits(Op, UsefulBits, Depth + 1);
2854}
2855
2857 uint64_t Imm, uint64_t MSB,
2858 unsigned Depth) {
2859 // inherit the bitwidth value
2860 APInt OpUsefulBits(UsefulBits);
2861 OpUsefulBits = 1;
2862
2863 if (MSB >= Imm) {
2864 OpUsefulBits <<= MSB - Imm + 1;
2865 --OpUsefulBits;
2866 // The interesting part will be in the lower part of the result
2867 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2868 // The interesting part was starting at Imm in the argument
2869 OpUsefulBits <<= Imm;
2870 } else {
2871 OpUsefulBits <<= MSB + 1;
2872 --OpUsefulBits;
2873 // The interesting part will be shifted in the result
2874 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2875 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2876 // The interesting part was at zero in the argument
2877 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2878 }
2879
2880 UsefulBits &= OpUsefulBits;
2881}
2882
2883static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2884 unsigned Depth) {
2885 uint64_t Imm =
2886 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2887 uint64_t MSB =
2888 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2889
2890 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2891}
2892
2894 unsigned Depth) {
2895 uint64_t ShiftTypeAndValue =
2896 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2897 APInt Mask(UsefulBits);
2898 Mask.clearAllBits();
2899 Mask.flipAllBits();
2900
2901 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2902 // Shift Left
2903 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2904 Mask <<= ShiftAmt;
2905 getUsefulBits(Op, Mask, Depth + 1);
2906 Mask.lshrInPlace(ShiftAmt);
2907 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2908 // Shift Right
2909 // We do not handle AArch64_AM::ASR, because the sign will change the
2910 // number of useful bits
2911 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2912 Mask.lshrInPlace(ShiftAmt);
2913 getUsefulBits(Op, Mask, Depth + 1);
2914 Mask <<= ShiftAmt;
2915 } else
2916 return;
2917
2918 UsefulBits &= Mask;
2919}
2920
2921static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2922 unsigned Depth) {
2923 uint64_t Imm =
2924 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2925 uint64_t MSB =
2926 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2927
2928 APInt OpUsefulBits(UsefulBits);
2929 OpUsefulBits = 1;
2930
2931 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2932 ResultUsefulBits.flipAllBits();
2933 APInt Mask(UsefulBits.getBitWidth(), 0);
2934
2935 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2936
2937 if (MSB >= Imm) {
2938 // The instruction is a BFXIL.
2939 uint64_t Width = MSB - Imm + 1;
2940 uint64_t LSB = Imm;
2941
2942 OpUsefulBits <<= Width;
2943 --OpUsefulBits;
2944
2945 if (Op.getOperand(1) == Orig) {
2946 // Copy the low bits from the result to bits starting from LSB.
2947 Mask = ResultUsefulBits & OpUsefulBits;
2948 Mask <<= LSB;
2949 }
2950
2951 if (Op.getOperand(0) == Orig)
2952 // Bits starting from LSB in the input contribute to the result.
2953 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2954 } else {
2955 // The instruction is a BFI.
2956 uint64_t Width = MSB + 1;
2957 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2958
2959 OpUsefulBits <<= Width;
2960 --OpUsefulBits;
2961 OpUsefulBits <<= LSB;
2962
2963 if (Op.getOperand(1) == Orig) {
2964 // Copy the bits from the result to the zero bits.
2965 Mask = ResultUsefulBits & OpUsefulBits;
2966 Mask.lshrInPlace(LSB);
2967 }
2968
2969 if (Op.getOperand(0) == Orig)
2970 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2971 }
2972
2973 UsefulBits &= Mask;
2974}
2975
2976static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2977 SDValue Orig, unsigned Depth) {
2978
2979 // Users of this node should have already been instruction selected
2980 // FIXME: Can we turn that into an assert?
2981 if (!UserNode->isMachineOpcode())
2982 return;
2983
2984 switch (UserNode->getMachineOpcode()) {
2985 default:
2986 return;
2987 case AArch64::ANDSWri:
2988 case AArch64::ANDSXri:
2989 case AArch64::ANDWri:
2990 case AArch64::ANDXri:
2991 // We increment Depth only when we call the getUsefulBits
2992 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2993 Depth);
2994 case AArch64::UBFMWri:
2995 case AArch64::UBFMXri:
2996 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2997
2998 case AArch64::ORRWrs:
2999 case AArch64::ORRXrs:
3000 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3001 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3002 Depth);
3003 return;
3004 case AArch64::BFMWri:
3005 case AArch64::BFMXri:
3006 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3007
3008 case AArch64::STRBBui:
3009 case AArch64::STURBBi:
3010 if (UserNode->getOperand(0) != Orig)
3011 return;
3012 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3013 return;
3014
3015 case AArch64::STRHHui:
3016 case AArch64::STURHHi:
3017 if (UserNode->getOperand(0) != Orig)
3018 return;
3019 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3020 return;
3021 }
3022}
3023
3024static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3026 return;
3027 // Initialize UsefulBits
3028 if (!Depth) {
3029 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3030 // At the beginning, assume every produced bits is useful
3031 UsefulBits = APInt(Bitwidth, 0);
3032 UsefulBits.flipAllBits();
3033 }
3034 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3035
3036 for (SDNode *Node : Op.getNode()->users()) {
3037 // A use cannot produce useful bits
3038 APInt UsefulBitsForUse = APInt(UsefulBits);
3039 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3040 UsersUsefulBits |= UsefulBitsForUse;
3041 }
3042 // UsefulBits contains the produced bits that are meaningful for the
3043 // current definition, thus a user cannot make a bit meaningful at
3044 // this point
3045 UsefulBits &= UsersUsefulBits;
3046}
3047
3048/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3049/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3050/// 0, return Op unchanged.
3051static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3052 if (ShlAmount == 0)
3053 return Op;
3054
3055 EVT VT = Op.getValueType();
3056 SDLoc dl(Op);
3057 unsigned BitWidth = VT.getSizeInBits();
3058 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3059
3060 SDNode *ShiftNode;
3061 if (ShlAmount > 0) {
3062 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3063 ShiftNode = CurDAG->getMachineNode(
3064 UBFMOpc, dl, VT, Op,
3065 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3066 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3067 } else {
3068 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3069 assert(ShlAmount < 0 && "expected right shift");
3070 int ShrAmount = -ShlAmount;
3071 ShiftNode = CurDAG->getMachineNode(
3072 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3073 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3074 }
3075
3076 return SDValue(ShiftNode, 0);
3077}
3078
3079// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3081 bool BiggerPattern,
3082 const uint64_t NonZeroBits,
3083 SDValue &Src, int &DstLSB,
3084 int &Width);
3085
3086// For bit-field-positioning pattern "shl VAL, N)".
3088 bool BiggerPattern,
3089 const uint64_t NonZeroBits,
3090 SDValue &Src, int &DstLSB,
3091 int &Width);
3092
3093/// Does this tree qualify as an attempt to move a bitfield into position,
3094/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3096 bool BiggerPattern, SDValue &Src,
3097 int &DstLSB, int &Width) {
3098 EVT VT = Op.getValueType();
3099 unsigned BitWidth = VT.getSizeInBits();
3100 (void)BitWidth;
3101 assert(BitWidth == 32 || BitWidth == 64);
3102
3103 KnownBits Known = CurDAG->computeKnownBits(Op);
3104
3105 // Non-zero in the sense that they're not provably zero, which is the key
3106 // point if we want to use this value
3107 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3108 if (!isShiftedMask_64(NonZeroBits))
3109 return false;
3110
3111 switch (Op.getOpcode()) {
3112 default:
3113 break;
3114 case ISD::AND:
3115 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3116 NonZeroBits, Src, DstLSB, Width);
3117 case ISD::SHL:
3118 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3119 NonZeroBits, Src, DstLSB, Width);
3120 }
3121
3122 return false;
3123}
3124
3126 bool BiggerPattern,
3127 const uint64_t NonZeroBits,
3128 SDValue &Src, int &DstLSB,
3129 int &Width) {
3130 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3131
3132 EVT VT = Op.getValueType();
3133 assert((VT == MVT::i32 || VT == MVT::i64) &&
3134 "Caller guarantees VT is one of i32 or i64");
3135 (void)VT;
3136
3137 uint64_t AndImm;
3138 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3139 return false;
3140
3141 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3142 // 1) (AndImm & (1 << POS) == 0)
3143 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3144 //
3145 // 1) and 2) don't agree so something must be wrong (e.g., in
3146 // 'SelectionDAG::computeKnownBits')
3147 assert((~AndImm & NonZeroBits) == 0 &&
3148 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3149
3150 SDValue AndOp0 = Op.getOperand(0);
3151
3152 uint64_t ShlImm;
3153 SDValue ShlOp0;
3154 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3155 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3156 ShlOp0 = AndOp0.getOperand(0);
3157 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3159 ShlImm)) {
3160 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3161
3162 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3163 SDValue ShlVal = AndOp0.getOperand(0);
3164
3165 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3166 // expect VT to be MVT::i32.
3167 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3168
3169 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3170 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3171 } else
3172 return false;
3173
3174 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3175 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3176 // AndOp0+AND.
3177 if (!BiggerPattern && !AndOp0.hasOneUse())
3178 return false;
3179
3180 DstLSB = llvm::countr_zero(NonZeroBits);
3181 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3182
3183 // Bail out on large Width. This happens when no proper combining / constant
3184 // folding was performed.
3185 if (Width >= (int)VT.getSizeInBits()) {
3186 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3187 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3188 // "val".
3189 // If VT is i32, what Width >= 32 means:
3190 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3191 // demands at least 'Width' bits (after dag-combiner). This together with
3192 // `any_extend` Op (undefined higher bits) indicates missed combination
3193 // when lowering the 'and' IR instruction to an machine IR instruction.
3194 LLVM_DEBUG(
3195 dbgs()
3196 << "Found large Width in bit-field-positioning -- this indicates no "
3197 "proper combining / constant folding was performed\n");
3198 return false;
3199 }
3200
3201 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3202 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3203 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3204 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3205 // which case it is not profitable to insert an extra shift.
3206 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3207 return false;
3208
3209 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3210 return true;
3211}
3212
3213// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3214// UBFIZ.
3216 SDValue &Src, int &DstLSB,
3217 int &Width) {
3218 // Caller should have verified that N is a left shift with constant shift
3219 // amount; asserts that.
3220 assert(Op.getOpcode() == ISD::SHL &&
3221 "Op.getNode() should be a SHL node to call this function");
3222 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3223 "Op.getNode() should shift ShlImm to call this function");
3224
3225 uint64_t AndImm = 0;
3226 SDValue Op0 = Op.getOperand(0);
3227 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3228 return false;
3229
3230 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3231 if (isMask_64(ShiftedAndImm)) {
3232 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3233 // should end with Mask, and could be prefixed with random bits if those
3234 // bits are shifted out.
3235 //
3236 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3237 // the AND result corresponding to those bits are shifted out, so it's fine
3238 // to not extract them.
3239 Width = llvm::countr_one(ShiftedAndImm);
3240 DstLSB = ShlImm;
3241 Src = Op0.getOperand(0);
3242 return true;
3243 }
3244 return false;
3245}
3246
3248 bool BiggerPattern,
3249 const uint64_t NonZeroBits,
3250 SDValue &Src, int &DstLSB,
3251 int &Width) {
3252 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3253
3254 EVT VT = Op.getValueType();
3255 assert((VT == MVT::i32 || VT == MVT::i64) &&
3256 "Caller guarantees that type is i32 or i64");
3257 (void)VT;
3258
3259 uint64_t ShlImm;
3260 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3261 return false;
3262
3263 if (!BiggerPattern && !Op.hasOneUse())
3264 return false;
3265
3266 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3267 return true;
3268
3269 DstLSB = llvm::countr_zero(NonZeroBits);
3270 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3271
3272 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3273 return false;
3274
3275 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3276 return true;
3277}
3278
3279static bool isShiftedMask(uint64_t Mask, EVT VT) {
3280 assert(VT == MVT::i32 || VT == MVT::i64);
3281 if (VT == MVT::i32)
3282 return isShiftedMask_32(Mask);
3283 return isShiftedMask_64(Mask);
3284}
3285
3286// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3287// inserted only sets known zero bits.
3289 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3290
3291 EVT VT = N->getValueType(0);
3292 if (VT != MVT::i32 && VT != MVT::i64)
3293 return false;
3294
3295 unsigned BitWidth = VT.getSizeInBits();
3296
3297 uint64_t OrImm;
3298 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3299 return false;
3300
3301 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3302 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3303 // performance neutral.
3305 return false;
3306
3307 uint64_t MaskImm;
3308 SDValue And = N->getOperand(0);
3309 // Must be a single use AND with an immediate operand.
3310 if (!And.hasOneUse() ||
3311 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3312 return false;
3313
3314 // Compute the Known Zero for the AND as this allows us to catch more general
3315 // cases than just looking for AND with imm.
3316 KnownBits Known = CurDAG->computeKnownBits(And);
3317
3318 // Non-zero in the sense that they're not provably zero, which is the key
3319 // point if we want to use this value.
3320 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3321
3322 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3323 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3324 return false;
3325
3326 // The bits being inserted must only set those bits that are known to be zero.
3327 if ((OrImm & NotKnownZero) != 0) {
3328 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3329 // currently handle this case.
3330 return false;
3331 }
3332
3333 // BFI/BFXIL dst, src, #lsb, #width.
3334 int LSB = llvm::countr_one(NotKnownZero);
3335 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3336
3337 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3338 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3339 unsigned ImmS = Width - 1;
3340
3341 // If we're creating a BFI instruction avoid cases where we need more
3342 // instructions to materialize the BFI constant as compared to the original
3343 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3344 // should be no worse in this case.
3345 bool IsBFI = LSB != 0;
3346 uint64_t BFIImm = OrImm >> LSB;
3347 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3348 // We have a BFI instruction and we know the constant can't be materialized
3349 // with a ORR-immediate with the zero register.
3350 unsigned OrChunks = 0, BFIChunks = 0;
3351 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3352 if (((OrImm >> Shift) & 0xFFFF) != 0)
3353 ++OrChunks;
3354 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3355 ++BFIChunks;
3356 }
3357 if (BFIChunks > OrChunks)
3358 return false;
3359 }
3360
3361 // Materialize the constant to be inserted.
3362 SDLoc DL(N);
3363 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3364 SDNode *MOVI = CurDAG->getMachineNode(
3365 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3366
3367 // Create the BFI/BFXIL instruction.
3368 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3369 CurDAG->getTargetConstant(ImmR, DL, VT),
3370 CurDAG->getTargetConstant(ImmS, DL, VT)};
3371 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3372 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3373 return true;
3374}
3375
3377 SDValue &ShiftedOperand,
3378 uint64_t &EncodedShiftImm) {
3379 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3380 if (!Dst.hasOneUse())
3381 return false;
3382
3383 EVT VT = Dst.getValueType();
3384 assert((VT == MVT::i32 || VT == MVT::i64) &&
3385 "Caller should guarantee that VT is one of i32 or i64");
3386 const unsigned SizeInBits = VT.getSizeInBits();
3387
3388 SDLoc DL(Dst.getNode());
3389 uint64_t AndImm, ShlImm;
3390 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3391 isShiftedMask_64(AndImm)) {
3392 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3393 SDValue DstOp0 = Dst.getOperand(0);
3394 if (!DstOp0.hasOneUse())
3395 return false;
3396
3397 // An example to illustrate the transformation
3398 // From:
3399 // lsr x8, x1, #1
3400 // and x8, x8, #0x3f80
3401 // bfxil x8, x1, #0, #7
3402 // To:
3403 // and x8, x23, #0x7f
3404 // ubfx x9, x23, #8, #7
3405 // orr x23, x8, x9, lsl #7
3406 //
3407 // The number of instructions remains the same, but ORR is faster than BFXIL
3408 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3409 // the dependency chain is improved after the transformation.
3410 uint64_t SrlImm;
3411 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3412 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3413 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3414 unsigned MaskWidth =
3415 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3416 unsigned UBFMOpc =
3417 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3418 SDNode *UBFMNode = CurDAG->getMachineNode(
3419 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3420 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3421 VT),
3422 CurDAG->getTargetConstant(
3423 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3424 ShiftedOperand = SDValue(UBFMNode, 0);
3425 EncodedShiftImm = AArch64_AM::getShifterImm(
3426 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3427 return true;
3428 }
3429 }
3430 return false;
3431 }
3432
3433 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3434 ShiftedOperand = Dst.getOperand(0);
3435 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3436 return true;
3437 }
3438
3439 uint64_t SrlImm;
3440 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3441 ShiftedOperand = Dst.getOperand(0);
3442 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3443 return true;
3444 }
3445 return false;
3446}
3447
3448// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3449// the operands and select it to AArch64::ORR with shifted registers if
3450// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3451static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3452 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3453 const bool BiggerPattern) {
3454 EVT VT = N->getValueType(0);
3455 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3456 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3457 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3458 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3459 assert((VT == MVT::i32 || VT == MVT::i64) &&
3460 "Expect result type to be i32 or i64 since N is combinable to BFM");
3461 SDLoc DL(N);
3462
3463 // Bail out if BFM simplifies away one node in BFM Dst.
3464 if (OrOpd1 != Dst)
3465 return false;
3466
3467 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3468 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3469 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3470 if (BiggerPattern) {
3471 uint64_t SrcAndImm;
3472 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3473 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3474 // OrOpd0 = AND Src, #Mask
3475 // So BFM simplifies away one AND node from Src and doesn't simplify away
3476 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3477 // one node (from Rd), ORR is better since it has higher throughput and
3478 // smaller latency than BFM on many AArch64 processors (and for the rest
3479 // ORR is at least as good as BFM).
3480 SDValue ShiftedOperand;
3481 uint64_t EncodedShiftImm;
3482 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3483 EncodedShiftImm)) {
3484 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3485 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3486 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3487 return true;
3488 }
3489 }
3490 return false;
3491 }
3492
3493 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3494
3495 uint64_t ShlImm;
3496 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3497 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3498 SDValue Ops[] = {
3499 Dst, Src,
3500 CurDAG->getTargetConstant(
3502 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3503 return true;
3504 }
3505
3506 // Select the following pattern to left-shifted operand rather than BFI.
3507 // %val1 = op ..
3508 // %val2 = shl %val1, #imm
3509 // %res = or %val1, %val2
3510 //
3511 // If N is selected to be BFI, we know that
3512 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3513 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3514 //
3515 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3516 if (OrOpd0.getOperand(0) == OrOpd1) {
3517 SDValue Ops[] = {
3518 OrOpd1, OrOpd1,
3519 CurDAG->getTargetConstant(
3521 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3522 return true;
3523 }
3524 }
3525
3526 uint64_t SrlImm;
3527 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3528 // Select the following pattern to right-shifted operand rather than BFXIL.
3529 // %val1 = op ..
3530 // %val2 = lshr %val1, #imm
3531 // %res = or %val1, %val2
3532 //
3533 // If N is selected to be BFXIL, we know that
3534 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3535 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3536 //
3537 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3538 if (OrOpd0.getOperand(0) == OrOpd1) {
3539 SDValue Ops[] = {
3540 OrOpd1, OrOpd1,
3541 CurDAG->getTargetConstant(
3543 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3544 return true;
3545 }
3546 }
3547
3548 return false;
3549}
3550
3551static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3552 SelectionDAG *CurDAG) {
3553 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3554
3555 EVT VT = N->getValueType(0);
3556 if (VT != MVT::i32 && VT != MVT::i64)
3557 return false;
3558
3559 unsigned BitWidth = VT.getSizeInBits();
3560
3561 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3562 // have the expected shape. Try to undo that.
3563
3564 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3565 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3566
3567 // Given a OR operation, check if we have the following pattern
3568 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3569 // isBitfieldExtractOp)
3570 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3571 // countTrailingZeros(mask2) == imm2 - imm + 1
3572 // f = d | c
3573 // if yes, replace the OR instruction with:
3574 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3575
3576 // OR is commutative, check all combinations of operand order and values of
3577 // BiggerPattern, i.e.
3578 // Opd0, Opd1, BiggerPattern=false
3579 // Opd1, Opd0, BiggerPattern=false
3580 // Opd0, Opd1, BiggerPattern=true
3581 // Opd1, Opd0, BiggerPattern=true
3582 // Several of these combinations may match, so check with BiggerPattern=false
3583 // first since that will produce better results by matching more instructions
3584 // and/or inserting fewer extra instructions.
3585 for (int I = 0; I < 4; ++I) {
3586
3587 SDValue Dst, Src;
3588 unsigned ImmR, ImmS;
3589 bool BiggerPattern = I / 2;
3590 SDValue OrOpd0Val = N->getOperand(I % 2);
3591 SDNode *OrOpd0 = OrOpd0Val.getNode();
3592 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3593 SDNode *OrOpd1 = OrOpd1Val.getNode();
3594
3595 unsigned BFXOpc;
3596 int DstLSB, Width;
3597 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3598 NumberOfIgnoredLowBits, BiggerPattern)) {
3599 // Check that the returned opcode is compatible with the pattern,
3600 // i.e., same type and zero extended (U and not S)
3601 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3602 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3603 continue;
3604
3605 // Compute the width of the bitfield insertion
3606 DstLSB = 0;
3607 Width = ImmS - ImmR + 1;
3608 // FIXME: This constraint is to catch bitfield insertion we may
3609 // want to widen the pattern if we want to grab general bitfied
3610 // move case
3611 if (Width <= 0)
3612 continue;
3613
3614 // If the mask on the insertee is correct, we have a BFXIL operation. We
3615 // can share the ImmR and ImmS values from the already-computed UBFM.
3616 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3617 BiggerPattern,
3618 Src, DstLSB, Width)) {
3619 ImmR = (BitWidth - DstLSB) % BitWidth;
3620 ImmS = Width - 1;
3621 } else
3622 continue;
3623
3624 // Check the second part of the pattern
3625 EVT VT = OrOpd1Val.getValueType();
3626 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3627
3628 // Compute the Known Zero for the candidate of the first operand.
3629 // This allows to catch more general case than just looking for
3630 // AND with imm. Indeed, simplify-demanded-bits may have removed
3631 // the AND instruction because it proves it was useless.
3632 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3633
3634 // Check if there is enough room for the second operand to appear
3635 // in the first one
3636 APInt BitsToBeInserted =
3637 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3638
3639 if ((BitsToBeInserted & ~Known.Zero) != 0)
3640 continue;
3641
3642 // Set the first operand
3643 uint64_t Imm;
3644 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3645 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3646 // In that case, we can eliminate the AND
3647 Dst = OrOpd1->getOperand(0);
3648 else
3649 // Maybe the AND has been removed by simplify-demanded-bits
3650 // or is useful because it discards more bits
3651 Dst = OrOpd1Val;
3652
3653 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3654 // with shifted operand is more efficient.
3655 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3656 BiggerPattern))
3657 return true;
3658
3659 // both parts match
3660 SDLoc DL(N);
3661 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3662 CurDAG->getTargetConstant(ImmS, DL, VT)};
3663 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3664 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3665 return true;
3666 }
3667
3668 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3669 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3670 // mask (e.g., 0x000ffff0).
3671 uint64_t Mask0Imm, Mask1Imm;
3672 SDValue And0 = N->getOperand(0);
3673 SDValue And1 = N->getOperand(1);
3674 if (And0.hasOneUse() && And1.hasOneUse() &&
3675 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3676 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3677 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3678 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3679
3680 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3681 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3682 // bits to be inserted.
3683 if (isShiftedMask(Mask0Imm, VT)) {
3684 std::swap(And0, And1);
3685 std::swap(Mask0Imm, Mask1Imm);
3686 }
3687
3688 SDValue Src = And1->getOperand(0);
3689 SDValue Dst = And0->getOperand(0);
3690 unsigned LSB = llvm::countr_zero(Mask1Imm);
3691 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3692
3693 // The BFXIL inserts the low-order bits from a source register, so right
3694 // shift the needed bits into place.
3695 SDLoc DL(N);
3696 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3697 uint64_t LsrImm = LSB;
3698 if (Src->hasOneUse() &&
3699 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3700 (LsrImm + LSB) < BitWidth) {
3701 Src = Src->getOperand(0);
3702 LsrImm += LSB;
3703 }
3704
3705 SDNode *LSR = CurDAG->getMachineNode(
3706 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3707 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3708
3709 // BFXIL is an alias of BFM, so translate to BFM operands.
3710 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3711 unsigned ImmS = Width - 1;
3712
3713 // Create the BFXIL instruction.
3714 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3715 CurDAG->getTargetConstant(ImmR, DL, VT),
3716 CurDAG->getTargetConstant(ImmS, DL, VT)};
3717 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3718 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3719 return true;
3720 }
3721
3722 return false;
3723}
3724
3725bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3726 if (N->getOpcode() != ISD::OR)
3727 return false;
3728
3729 APInt NUsefulBits;
3730 getUsefulBits(SDValue(N, 0), NUsefulBits);
3731
3732 // If all bits are not useful, just return UNDEF.
3733 if (!NUsefulBits) {
3734 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3735 return true;
3736 }
3737
3738 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3739 return true;
3740
3741 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3742}
3743
3744/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3745/// equivalent of a left shift by a constant amount followed by an and masking
3746/// out a contiguous set of bits.
3747bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3748 if (N->getOpcode() != ISD::AND)
3749 return false;
3750
3751 EVT VT = N->getValueType(0);
3752 if (VT != MVT::i32 && VT != MVT::i64)
3753 return false;
3754
3755 SDValue Op0;
3756 int DstLSB, Width;
3757 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3758 Op0, DstLSB, Width))
3759 return false;
3760
3761 // ImmR is the rotate right amount.
3762 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3763 // ImmS is the most significant bit of the source to be moved.
3764 unsigned ImmS = Width - 1;
3765
3766 SDLoc DL(N);
3767 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3768 CurDAG->getTargetConstant(ImmS, DL, VT)};
3769 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3770 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3771 return true;
3772}
3773
3774/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3775/// variable shift/rotate instructions.
3776bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3777 EVT VT = N->getValueType(0);
3778
3779 unsigned Opc;
3780 switch (N->getOpcode()) {
3781 case ISD::ROTR:
3782 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3783 break;
3784 case ISD::SHL:
3785 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3786 break;
3787 case ISD::SRL:
3788 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3789 break;
3790 case ISD::SRA:
3791 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3792 break;
3793 default:
3794 return false;
3795 }
3796
3797 uint64_t Size;
3798 uint64_t Bits;
3799 if (VT == MVT::i32) {
3800 Bits = 5;
3801 Size = 32;
3802 } else if (VT == MVT::i64) {
3803 Bits = 6;
3804 Size = 64;
3805 } else
3806 return false;
3807
3808 SDValue ShiftAmt = N->getOperand(1);
3809 SDLoc DL(N);
3810 SDValue NewShiftAmt;
3811
3812 // Skip over an extend of the shift amount.
3813 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3814 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3815 ShiftAmt = ShiftAmt->getOperand(0);
3816
3817 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3818 SDValue Add0 = ShiftAmt->getOperand(0);
3819 SDValue Add1 = ShiftAmt->getOperand(1);
3820 uint64_t Add0Imm;
3821 uint64_t Add1Imm;
3822 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3823 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3824 // to avoid the ADD/SUB.
3825 NewShiftAmt = Add0;
3826 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3827 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3828 (Add0Imm % Size == 0)) {
3829 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3830 // to generate a NEG instead of a SUB from a constant.
3831 unsigned NegOpc;
3832 unsigned ZeroReg;
3833 EVT SubVT = ShiftAmt->getValueType(0);
3834 if (SubVT == MVT::i32) {
3835 NegOpc = AArch64::SUBWrr;
3836 ZeroReg = AArch64::WZR;
3837 } else {
3838 assert(SubVT == MVT::i64);
3839 NegOpc = AArch64::SUBXrr;
3840 ZeroReg = AArch64::XZR;
3841 }
3842 SDValue Zero =
3843 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3844 MachineSDNode *Neg =
3845 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3846 NewShiftAmt = SDValue(Neg, 0);
3847 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3848 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3849 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3850 // to generate a NOT instead of a SUB from a constant.
3851 unsigned NotOpc;
3852 unsigned ZeroReg;
3853 EVT SubVT = ShiftAmt->getValueType(0);
3854 if (SubVT == MVT::i32) {
3855 NotOpc = AArch64::ORNWrr;
3856 ZeroReg = AArch64::WZR;
3857 } else {
3858 assert(SubVT == MVT::i64);
3859 NotOpc = AArch64::ORNXrr;
3860 ZeroReg = AArch64::XZR;
3861 }
3862 SDValue Zero =
3863 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3865 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3866 NewShiftAmt = SDValue(Not, 0);
3867 } else
3868 return false;
3869 } else {
3870 // If the shift amount is masked with an AND, check that the mask covers the
3871 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3872 // the AND.
3873 uint64_t MaskImm;
3874 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3875 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3876 return false;
3877
3878 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3879 return false;
3880
3881 NewShiftAmt = ShiftAmt->getOperand(0);
3882 }
3883
3884 // Narrow/widen the shift amount to match the size of the shift operation.
3885 if (VT == MVT::i32)
3886 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3887 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3888 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3889 MachineSDNode *Ext = CurDAG->getMachineNode(
3890 AArch64::SUBREG_TO_REG, DL, VT,
3891 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3892 NewShiftAmt = SDValue(Ext, 0);
3893 }
3894
3895 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3896 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3897 return true;
3898}
3899
3901 SDValue &FixedPos,
3902 unsigned RegWidth,
3903 bool isReciprocal) {
3904 APFloat FVal(0.0);
3905 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
3906 FVal = CN->getValueAPF();
3907 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3908 // Some otherwise illegal constants are allowed in this case.
3909 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3910 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3911 return false;
3912
3913 ConstantPoolSDNode *CN =
3914 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3915 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3916 } else
3917 return false;
3918
3919 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3920 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3921 // x-register.
3922 //
3923 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3924 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3925 // integers.
3926 bool IsExact;
3927
3928 if (isReciprocal)
3929 if (!FVal.getExactInverse(&FVal))
3930 return false;
3931
3932 // fbits is between 1 and 64 in the worst-case, which means the fmul
3933 // could have 2^64 as an actual operand. Need 65 bits of precision.
3934 APSInt IntVal(65, true);
3935 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
3936
3937 // N.b. isPowerOf2 also checks for > 0.
3938 if (!IsExact || !IntVal.isPowerOf2())
3939 return false;
3940 unsigned FBits = IntVal.logBase2();
3941
3942 // Checks above should have guaranteed that we haven't lost information in
3943 // finding FBits, but it must still be in range.
3944 if (FBits == 0 || FBits > RegWidth) return false;
3945
3946 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
3947 return true;
3948}
3949
3950bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3951 unsigned RegWidth) {
3952 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3953 false);
3954}
3955
3956bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
3957 SDValue &FixedPos,
3958 unsigned RegWidth) {
3959 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3960 true);
3961}
3962
3963// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3964// of the string and obtains the integer values from them and combines these
3965// into a single value to be used in the MRS/MSR instruction.
3968 RegString.split(Fields, ':');
3969
3970 if (Fields.size() == 1)
3971 return -1;
3972
3973 assert(Fields.size() == 5
3974 && "Invalid number of fields in read register string");
3975
3977 bool AllIntFields = true;
3978
3979 for (StringRef Field : Fields) {
3980 unsigned IntField;
3981 AllIntFields &= !Field.getAsInteger(10, IntField);
3982 Ops.push_back(IntField);
3983 }
3984
3985 assert(AllIntFields &&
3986 "Unexpected non-integer value in special register string.");
3987 (void)AllIntFields;
3988
3989 // Need to combine the integer fields of the string into a single value
3990 // based on the bit encoding of MRS/MSR instruction.
3991 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
3992 (Ops[3] << 3) | (Ops[4]);
3993}
3994
3995// Lower the read_register intrinsic to an MRS instruction node if the special
3996// register string argument is either of the form detailed in the ALCE (the
3997// form described in getIntOperandsFromRegsterString) or is a named register
3998// known by the MRS SysReg mapper.
3999bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4000 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4001 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4002 SDLoc DL(N);
4003
4004 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4005
4006 unsigned Opcode64Bit = AArch64::MRS;
4007 int Imm = getIntOperandFromRegisterString(RegString->getString());
4008 if (Imm == -1) {
4009 // No match, Use the sysreg mapper to map the remaining possible strings to
4010 // the value for the register to be used for the instruction operand.
4011 const auto *TheReg =
4012 AArch64SysReg::lookupSysRegByName(RegString->getString());
4013 if (TheReg && TheReg->Readable &&
4014 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4015 Imm = TheReg->Encoding;
4016 else
4017 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4018
4019 if (Imm == -1) {
4020 // Still no match, see if this is "pc" or give up.
4021 if (!ReadIs128Bit && RegString->getString() == "pc") {
4022 Opcode64Bit = AArch64::ADR;
4023 Imm = 0;
4024 } else {
4025 return false;
4026 }
4027 }
4028 }
4029
4030 SDValue InChain = N->getOperand(0);
4031 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4032 if (!ReadIs128Bit) {
4033 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4034 {SysRegImm, InChain});
4035 } else {
4036 SDNode *MRRS = CurDAG->getMachineNode(
4037 AArch64::MRRS, DL,
4038 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4039 {SysRegImm, InChain});
4040
4041 // Sysregs are not endian. The even register always contains the low half
4042 // of the register.
4043 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4044 SDValue(MRRS, 0));
4045 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4046 SDValue(MRRS, 0));
4047 SDValue OutChain = SDValue(MRRS, 1);
4048
4049 ReplaceUses(SDValue(N, 0), Lo);
4050 ReplaceUses(SDValue(N, 1), Hi);
4051 ReplaceUses(SDValue(N, 2), OutChain);
4052 };
4053 return true;
4054}
4055
4056// Lower the write_register intrinsic to an MSR instruction node if the special
4057// register string argument is either of the form detailed in the ALCE (the
4058// form described in getIntOperandsFromRegsterString) or is a named register
4059// known by the MSR SysReg mapper.
4060bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4061 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4062 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4063 SDLoc DL(N);
4064
4065 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4066
4067 if (!WriteIs128Bit) {
4068 // Check if the register was one of those allowed as the pstatefield value
4069 // in the MSR (immediate) instruction. To accept the values allowed in the
4070 // pstatefield for the MSR (immediate) instruction, we also require that an
4071 // immediate value has been provided as an argument, we know that this is
4072 // the case as it has been ensured by semantic checking.
4073 auto trySelectPState = [&](auto PMapper, unsigned State) {
4074 if (PMapper) {
4075 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4076 "Expected a constant integer expression.");
4077 unsigned Reg = PMapper->Encoding;
4078 uint64_t Immed = N->getConstantOperandVal(2);
4079 CurDAG->SelectNodeTo(
4080 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4081 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4082 return true;
4083 }
4084 return false;
4085 };
4086
4087 if (trySelectPState(
4088 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4089 AArch64::MSRpstateImm4))
4090 return true;
4091 if (trySelectPState(
4092 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4093 AArch64::MSRpstateImm1))
4094 return true;
4095 }
4096
4097 int Imm = getIntOperandFromRegisterString(RegString->getString());
4098 if (Imm == -1) {
4099 // Use the sysreg mapper to attempt to map the remaining possible strings
4100 // to the value for the register to be used for the MSR (register)
4101 // instruction operand.
4102 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4103 if (TheReg && TheReg->Writeable &&
4104 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4105 Imm = TheReg->Encoding;
4106 else
4107 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4108
4109 if (Imm == -1)
4110 return false;
4111 }
4112
4113 SDValue InChain = N->getOperand(0);
4114 if (!WriteIs128Bit) {
4115 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4116 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4117 N->getOperand(2), InChain);
4118 } else {
4119 // No endian swap. The lower half always goes into the even subreg, and the
4120 // higher half always into the odd supreg.
4121 SDNode *Pair = CurDAG->getMachineNode(
4122 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4123 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4124 MVT::i32),
4125 N->getOperand(2),
4126 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4127 N->getOperand(3),
4128 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4129
4130 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4131 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4132 SDValue(Pair, 0), InChain);
4133 }
4134
4135 return true;
4136}
4137
4138/// We've got special pseudo-instructions for these
4139bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4140 unsigned Opcode;
4141 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4142
4143 // Leave IR for LSE if subtarget supports it.
4144 if (Subtarget->hasLSE()) return false;
4145
4146 if (MemTy == MVT::i8)
4147 Opcode = AArch64::CMP_SWAP_8;
4148 else if (MemTy == MVT::i16)
4149 Opcode = AArch64::CMP_SWAP_16;
4150 else if (MemTy == MVT::i32)
4151 Opcode = AArch64::CMP_SWAP_32;
4152 else if (MemTy == MVT::i64)
4153 Opcode = AArch64::CMP_SWAP_64;
4154 else
4155 llvm_unreachable("Unknown AtomicCmpSwap type");
4156
4157 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4158 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4159 N->getOperand(0)};
4160 SDNode *CmpSwap = CurDAG->getMachineNode(
4161 Opcode, SDLoc(N),
4162 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4163
4164 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4165 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4166
4167 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4168 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4169 CurDAG->RemoveDeadNode(N);
4170
4171 return true;
4172}
4173
4174bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4175 SDValue &Shift) {
4176 if (!isa<ConstantSDNode>(N))
4177 return false;
4178
4179 SDLoc DL(N);
4180 uint64_t Val = cast<ConstantSDNode>(N)
4181 ->getAPIntValue()
4182 .trunc(VT.getFixedSizeInBits())
4183 .getZExtValue();
4184
4185 switch (VT.SimpleTy) {
4186 case MVT::i8:
4187 // All immediates are supported.
4188 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4189 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4190 return true;
4191 case MVT::i16:
4192 case MVT::i32:
4193 case MVT::i64:
4194 // Support 8bit unsigned immediates.
4195 if (Val <= 255) {
4196 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4197 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4198 return true;
4199 }
4200 // Support 16bit unsigned immediates that are a multiple of 256.
4201 if (Val <= 65280 && Val % 256 == 0) {
4202 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4203 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4204 return true;
4205 }
4206 break;
4207 default:
4208 break;
4209 }
4210
4211 return false;
4212}
4213
4214bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4215 SDValue &Imm, SDValue &Shift,
4216 bool Negate) {
4217 if (!isa<ConstantSDNode>(N))
4218 return false;
4219
4220 SDLoc DL(N);
4221 int64_t Val = cast<ConstantSDNode>(N)
4222 ->getAPIntValue()
4223 .trunc(VT.getFixedSizeInBits())
4224 .getSExtValue();
4225
4226 if (Negate)
4227 Val = -Val;
4228
4229 // Signed saturating instructions treat their immediate operand as unsigned,
4230 // whereas the related intrinsics define their operands to be signed. This
4231 // means we can only use the immediate form when the operand is non-negative.
4232 if (Val < 0)
4233 return false;
4234
4235 switch (VT.SimpleTy) {
4236 case MVT::i8:
4237 // All positive immediates are supported.
4238 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4239 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4240 return true;
4241 case MVT::i16:
4242 case MVT::i32:
4243 case MVT::i64:
4244 // Support 8bit positive immediates.
4245 if (Val <= 255) {
4246 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4247 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4248 return true;
4249 }
4250 // Support 16bit positive immediates that are a multiple of 256.
4251 if (Val <= 65280 && Val % 256 == 0) {
4252 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4253 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4254 return true;
4255 }
4256 break;
4257 default:
4258 break;
4259 }
4260
4261 return false;
4262}
4263
4264bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4265 SDValue &Shift) {
4266 if (!isa<ConstantSDNode>(N))
4267 return false;
4268
4269 SDLoc DL(N);
4270 int64_t Val = cast<ConstantSDNode>(N)
4271 ->getAPIntValue()
4272 .trunc(VT.getFixedSizeInBits())
4273 .getSExtValue();
4274
4275 switch (VT.SimpleTy) {
4276 case MVT::i8:
4277 // All immediates are supported.
4278 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4279 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4280 return true;
4281 case MVT::i16:
4282 case MVT::i32:
4283 case MVT::i64:
4284 // Support 8bit signed immediates.
4285 if (Val >= -128 && Val <= 127) {
4286 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4287 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4288 return true;
4289 }
4290 // Support 16bit signed immediates that are a multiple of 256.
4291 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
4292 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4293 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
4294 return true;
4295 }
4296 break;
4297 default:
4298 break;
4299 }
4300
4301 return false;
4302}
4303
4304bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4305 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4306 int64_t ImmVal = CNode->getSExtValue();
4307 SDLoc DL(N);
4308 if (ImmVal >= -128 && ImmVal < 128) {
4309 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4310 return true;
4311 }
4312 }
4313 return false;
4314}
4315
4316bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4317 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4318 uint64_t ImmVal = CNode->getZExtValue();
4319
4320 switch (VT.SimpleTy) {
4321 case MVT::i8:
4322 ImmVal &= 0xFF;
4323 break;
4324 case MVT::i16:
4325 ImmVal &= 0xFFFF;
4326 break;
4327 case MVT::i32:
4328 ImmVal &= 0xFFFFFFFF;
4329 break;
4330 case MVT::i64:
4331 break;
4332 default:
4333 llvm_unreachable("Unexpected type");
4334 }
4335
4336 if (ImmVal < 256) {
4337 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4338 return true;
4339 }
4340 }
4341 return false;
4342}
4343
4344bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4345 bool Invert) {
4346 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4347 uint64_t ImmVal = CNode->getZExtValue();
4348 SDLoc DL(N);
4349
4350 if (Invert)
4351 ImmVal = ~ImmVal;
4352
4353 // Shift mask depending on type size.
4354 switch (VT.SimpleTy) {
4355 case MVT::i8:
4356 ImmVal &= 0xFF;
4357 ImmVal |= ImmVal << 8;
4358 ImmVal |= ImmVal << 16;
4359 ImmVal |= ImmVal << 32;
4360 break;
4361 case MVT::i16:
4362 ImmVal &= 0xFFFF;
4363 ImmVal |= ImmVal << 16;
4364 ImmVal |= ImmVal << 32;
4365 break;
4366 case MVT::i32:
4367 ImmVal &= 0xFFFFFFFF;
4368 ImmVal |= ImmVal << 32;
4369 break;
4370 case MVT::i64:
4371 break;
4372 default:
4373 llvm_unreachable("Unexpected type");
4374 }
4375
4376 uint64_t encoding;
4377 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4378 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4379 return true;
4380 }
4381 }
4382 return false;
4383}
4384
4385// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4386// Rather than attempt to normalise everything we can sometimes saturate the
4387// shift amount during selection. This function also allows for consistent
4388// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4389// required by the instructions.
4390bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4391 uint64_t High, bool AllowSaturation,
4392 SDValue &Imm) {
4393 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4394 uint64_t ImmVal = CN->getZExtValue();
4395
4396 // Reject shift amounts that are too small.
4397 if (ImmVal < Low)
4398 return false;
4399
4400 // Reject or saturate shift amounts that are too big.
4401 if (ImmVal > High) {
4402 if (!AllowSaturation)
4403 return false;
4404 ImmVal = High;
4405 }
4406
4407 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4408 return true;
4409 }
4410
4411 return false;
4412}
4413
4414bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4415 // tagp(FrameIndex, IRGstack, tag_offset):
4416 // since the offset between FrameIndex and IRGstack is a compile-time
4417 // constant, this can be lowered to a single ADDG instruction.
4418 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4419 return false;
4420 }
4421
4422 SDValue IRG_SP = N->getOperand(2);
4423 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4424 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4425 return false;
4426 }
4427
4428 const TargetLowering *TLI = getTargetLowering();
4429 SDLoc DL(N);
4430 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4431 SDValue FiOp = CurDAG->getTargetFrameIndex(
4432 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4433 int TagOffset = N->getConstantOperandVal(3);
4434
4435 SDNode *Out = CurDAG->getMachineNode(
4436 AArch64::TAGPstack, DL, MVT::i64,
4437 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4438 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4439 ReplaceNode(N, Out);
4440 return true;
4441}
4442
4443void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4444 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4445 "llvm.aarch64.tagp third argument must be an immediate");
4446 if (trySelectStackSlotTagP(N))
4447 return;
4448 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4449 // compile-time constant, not just for stack allocations.
4450
4451 // General case for unrelated pointers in Op1 and Op2.
4452 SDLoc DL(N);
4453 int TagOffset = N->getConstantOperandVal(3);
4454 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4455 {N->getOperand(1), N->getOperand(2)});
4456 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4457 {SDValue(N1, 0), N->getOperand(2)});
4458 SDNode *N3 = CurDAG->getMachineNode(
4459 AArch64::ADDG, DL, MVT::i64,
4460 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4461 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4462 ReplaceNode(N, N3);
4463}
4464
4465bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4466 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4467
4468 // Bail when not a "cast" like insert_subvector.
4469 if (N->getConstantOperandVal(2) != 0)
4470 return false;
4471 if (!N->getOperand(0).isUndef())
4472 return false;
4473
4474 // Bail when normal isel should do the job.
4475 EVT VT = N->getValueType(0);
4476 EVT InVT = N->getOperand(1).getValueType();
4477 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4478 return false;
4479 if (InVT.getSizeInBits() <= 128)
4480 return false;
4481
4482 // NOTE: We can only get here when doing fixed length SVE code generation.
4483 // We do manual selection because the types involved are not linked to real
4484 // registers (despite being legal) and must be coerced into SVE registers.
4485
4487 "Expected to insert into a packed scalable vector!");
4488
4489 SDLoc DL(N);
4490 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4491 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4492 N->getOperand(1), RC));
4493 return true;
4494}
4495
4496bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4497 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4498
4499 // Bail when not a "cast" like extract_subvector.
4500 if (N->getConstantOperandVal(1) != 0)
4501 return false;
4502
4503 // Bail when normal isel can do the job.
4504 EVT VT = N->getValueType(0);
4505 EVT InVT = N->getOperand(0).getValueType();
4506 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4507 return false;
4508 if (VT.getSizeInBits() <= 128)
4509 return false;
4510
4511 // NOTE: We can only get here when doing fixed length SVE code generation.
4512 // We do manual selection because the types involved are not linked to real
4513 // registers (despite being legal) and must be coerced into SVE registers.
4514
4516 "Expected to extract from a packed scalable vector!");
4517
4518 SDLoc DL(N);
4519 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4520 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4521 N->getOperand(0), RC));
4522 return true;
4523}
4524
4525bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4526 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4527
4528 SDValue N0 = N->getOperand(0);
4529 SDValue N1 = N->getOperand(1);
4530 EVT VT = N->getValueType(0);
4531
4532 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4533 // Rotate by a constant is a funnel shift in IR which is exanded to
4534 // an OR with shifted operands.
4535 // We do the following transform:
4536 // OR N0, N1 -> xar (x, y, imm)
4537 // Where:
4538 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4539 // N0 = SHL_PRED true, V, splat(bits-imm)
4540 // V = (xor x, y)
4541 if (VT.isScalableVector() &&
4542 (Subtarget->hasSVE2() ||
4543 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4544 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4546 std::swap(N0, N1);
4547 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4549 return false;
4550
4551 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4552 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4553 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4554 return false;
4555
4556 SDValue XOR = N0.getOperand(1);
4557 if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(1))
4558 return false;
4559
4560 APInt ShlAmt, ShrAmt;
4561 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4563 return false;
4564
4565 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4566 return false;
4567
4568 SDLoc DL(N);
4569 SDValue Imm =
4570 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4571
4572 SDValue Ops[] = {XOR.getOperand(0), XOR.getOperand(1), Imm};
4573 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4574 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4575 AArch64::XAR_ZZZI_D})) {
4576 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4577 return true;
4578 }
4579 return false;
4580 }
4581
4582 if (!Subtarget->hasSHA3())
4583 return false;
4584
4585 if (N0->getOpcode() != AArch64ISD::VSHL ||
4587 return false;
4588
4589 if (N0->getOperand(0) != N1->getOperand(0) ||
4590 N1->getOperand(0)->getOpcode() != ISD::XOR)
4591 return false;
4592
4593 SDValue XOR = N0.getOperand(0);
4594 SDValue R1 = XOR.getOperand(0);
4595 SDValue R2 = XOR.getOperand(1);
4596
4597 unsigned HsAmt = N0.getConstantOperandVal(1);
4598 unsigned ShAmt = N1.getConstantOperandVal(1);
4599
4600 SDLoc DL = SDLoc(N0.getOperand(1));
4601 SDValue Imm = CurDAG->getTargetConstant(
4602 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4603
4604 if (ShAmt + HsAmt != 64)
4605 return false;
4606
4607 SDValue Ops[] = {R1, R2, Imm};
4608 CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
4609
4610 return true;
4611}
4612
4613void AArch64DAGToDAGISel::Select(SDNode *Node) {
4614 // If we have a custom node, we already have selected!
4615 if (Node->isMachineOpcode()) {
4616 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4617 Node->setNodeId(-1);
4618 return;
4619 }
4620
4621 // Few custom selection stuff.
4622 EVT VT = Node->getValueType(0);
4623
4624 switch (Node->getOpcode()) {
4625 default:
4626 break;
4627
4629 if (SelectCMP_SWAP(Node))
4630 return;
4631 break;
4632
4633 case ISD::READ_REGISTER:
4634 case AArch64ISD::MRRS:
4635 if (tryReadRegister(Node))
4636 return;
4637 break;
4638
4640 case AArch64ISD::MSRR:
4641 if (tryWriteRegister(Node))
4642 return;
4643 break;
4644
4645 case ISD::LOAD: {
4646 // Try to select as an indexed load. Fall through to normal processing
4647 // if we can't.
4648 if (tryIndexedLoad(Node))
4649 return;
4650 break;
4651 }
4652
4653 case ISD::SRL:
4654 case ISD::AND:
4655 case ISD::SRA:
4657 if (tryBitfieldExtractOp(Node))
4658 return;
4659 if (tryBitfieldInsertInZeroOp(Node))
4660 return;
4661 [[fallthrough]];
4662 case ISD::ROTR:
4663 case ISD::SHL:
4664 if (tryShiftAmountMod(Node))
4665 return;
4666 break;
4667
4668 case ISD::SIGN_EXTEND:
4669 if (tryBitfieldExtractOpFromSExt(Node))
4670 return;
4671 break;
4672
4673 case ISD::OR:
4674 if (tryBitfieldInsertOp(Node))
4675 return;
4676 if (trySelectXAR(Node))
4677 return;
4678 break;
4679
4681 if (trySelectCastScalableToFixedLengthVector(Node))
4682 return;
4683 break;
4684 }
4685
4686 case ISD::INSERT_SUBVECTOR: {
4687 if (trySelectCastFixedLengthToScalableVector(Node))
4688 return;
4689 break;
4690 }
4691
4692 case ISD::Constant: {
4693 // Materialize zero constants as copies from WZR/XZR. This allows
4694 // the coalescer to propagate these into other instructions.
4695 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4696 if (ConstNode->isZero()) {
4697 if (VT == MVT::i32) {
4698 SDValue New = CurDAG->getCopyFromReg(
4699 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4700 ReplaceNode(Node, New.getNode());
4701 return;
4702 } else if (VT == MVT::i64) {
4703 SDValue New = CurDAG->getCopyFromReg(
4704 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4705 ReplaceNode(Node, New.getNode());
4706 return;
4707 }
4708 }
4709 break;
4710 }
4711
4712 case ISD::FrameIndex: {
4713 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4714 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4715 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4716 const TargetLowering *TLI = getTargetLowering();
4717 SDValue TFI = CurDAG->getTargetFrameIndex(
4718 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4719 SDLoc DL(Node);
4720 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4721 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4722 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4723 return;
4724 }
4726 unsigned IntNo = Node->getConstantOperandVal(1);
4727 switch (IntNo) {
4728 default:
4729 break;
4730 case Intrinsic::aarch64_gcsss: {
4731 SDLoc DL(Node);
4732 SDValue Chain = Node->getOperand(0);
4733 SDValue Val = Node->getOperand(2);
4734 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4735 SDNode *SS1 =
4736 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4737 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4738 MVT::Other, Zero, SDValue(SS1, 0));
4739 ReplaceNode(Node, SS2);
4740 return;
4741 }
4742 case Intrinsic::aarch64_ldaxp:
4743 case Intrinsic::aarch64_ldxp: {
4744 unsigned Op =
4745 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4746 SDValue MemAddr = Node->getOperand(2);
4747 SDLoc DL(Node);
4748 SDValue Chain = Node->getOperand(0);
4749
4750 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4751 MVT::Other, MemAddr, Chain);
4752
4753 // Transfer memoperands.
4755 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4756 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4757 ReplaceNode(Node, Ld);
4758 return;
4759 }
4760 case Intrinsic::aarch64_stlxp:
4761 case Intrinsic::aarch64_stxp: {
4762 unsigned Op =
4763 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4764 SDLoc DL(Node);
4765 SDValue Chain = Node->getOperand(0);
4766 SDValue ValLo = Node->getOperand(2);
4767 SDValue ValHi = Node->getOperand(3);
4768 SDValue MemAddr = Node->getOperand(4);
4769
4770 // Place arguments in the right order.
4771 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4772
4773 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4774 // Transfer memoperands.
4776 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4777 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4778
4779 ReplaceNode(Node, St);
4780 return;
4781 }
4782 case Intrinsic::aarch64_neon_ld1x2:
4783 if (VT == MVT::v8i8) {
4784 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4785 return;
4786 } else if (VT == MVT::v16i8) {
4787 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4788 return;
4789 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4790 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4791 return;
4792 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4793 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4794 return;
4795 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4796 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4797 return;
4798 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4799 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4800 return;
4801 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4802 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4803 return;
4804 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4805 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4806 return;
4807 }
4808 break;
4809 case Intrinsic::aarch64_neon_ld1x3:
4810 if (VT == MVT::v8i8) {
4811 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4812 return;
4813 } else if (VT == MVT::v16i8) {
4814 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4815 return;
4816 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4817 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4818 return;
4819 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4820 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4821 return;
4822 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4823 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4824 return;
4825 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4826 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4827 return;
4828 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4829 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4830 return;
4831 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4832 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
4833 return;
4834 }
4835 break;
4836 case Intrinsic::aarch64_neon_ld1x4:
4837 if (VT == MVT::v8i8) {
4838 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
4839 return;
4840 } else if (VT == MVT::v16i8) {
4841 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
4842 return;
4843 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4844 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
4845 return;
4846 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4847 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
4848 return;
4849 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4850 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
4851 return;
4852 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4853 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
4854 return;
4855 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4856 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4857 return;
4858 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4859 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
4860 return;
4861 }
4862 break;
4863 case Intrinsic::aarch64_neon_ld2:
4864 if (VT == MVT::v8i8) {
4865 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
4866 return;
4867 } else if (VT == MVT::v16i8) {
4868 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
4869 return;
4870 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4871 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
4872 return;
4873 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4874 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
4875 return;
4876 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4877 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
4878 return;
4879 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4880 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
4881 return;
4882 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4883 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4884 return;
4885 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4886 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
4887 return;
4888 }
4889 break;
4890 case Intrinsic::aarch64_neon_ld3:
4891 if (VT == MVT::v8i8) {
4892 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
4893 return;
4894 } else if (VT == MVT::v16i8) {
4895 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
4896 return;
4897 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4898 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
4899 return;
4900 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4901 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
4902 return;
4903 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4904 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
4905 return;
4906 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4907 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
4908 return;
4909 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4910 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4911 return;
4912 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4913 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
4914 return;
4915 }
4916 break;
4917 case Intrinsic::aarch64_neon_ld4:
4918 if (VT == MVT::v8i8) {
4919 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
4920 return;
4921 } else if (VT == MVT::v16i8) {
4922 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
4923 return;
4924 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4925 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
4926 return;
4927 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4928 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
4929 return;
4930 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4931 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
4932 return;
4933 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4934 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
4935 return;
4936 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4937 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4938 return;
4939 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4940 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
4941 return;
4942 }
4943 break;
4944 case Intrinsic::aarch64_neon_ld2r:
4945 if (VT == MVT::v8i8) {
4946 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
4947 return;
4948 } else if (VT == MVT::v16i8) {
4949 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
4950 return;
4951 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4952 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
4953 return;
4954 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4955 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
4956 return;
4957 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4958 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
4959 return;
4960 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4961 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
4962 return;
4963 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4964 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
4965 return;
4966 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4967 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
4968 return;
4969 }
4970 break;
4971 case Intrinsic::aarch64_neon_ld3r:
4972 if (VT == MVT::v8i8) {
4973 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
4974 return;
4975 } else if (VT == MVT::v16i8) {
4976 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
4977 return;
4978 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4979 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
4980 return;
4981 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4982 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
4983 return;
4984 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4985 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
4986 return;
4987 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4988 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
4989 return;
4990 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4991 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
4992 return;
4993 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4994 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
4995 return;
4996 }
4997 break;
4998 case Intrinsic::aarch64_neon_ld4r:
4999 if (VT == MVT::v8i8) {
5000 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5001 return;
5002 } else if (VT == MVT::v16i8) {
5003 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5004 return;
5005 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5006 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5007 return;
5008 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5009 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5010 return;
5011 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5012 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5013 return;
5014 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5015 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5016 return;
5017 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5018 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5019 return;
5020 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5021 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5022 return;
5023 }
5024 break;
5025 case Intrinsic::aarch64_neon_ld2lane:
5026 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5027 SelectLoadLane(Node, 2, AArch64::LD2i8);
5028 return;
5029 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5030 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5031 SelectLoadLane(Node, 2, AArch64::LD2i16);
5032 return;
5033 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5034 VT == MVT::v2f32) {
5035 SelectLoadLane(Node, 2, AArch64::LD2i32);
5036 return;
5037 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5038 VT == MVT::v1f64) {
5039 SelectLoadLane(Node, 2, AArch64::LD2i64);
5040 return;
5041 }
5042 break;
5043 case Intrinsic::aarch64_neon_ld3lane:
5044 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5045 SelectLoadLane(Node, 3, AArch64::LD3i8);
5046 return;
5047 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5048 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5049 SelectLoadLane(Node, 3, AArch64::LD3i16);
5050 return;
5051 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5052 VT == MVT::v2f32) {
5053 SelectLoadLane(Node, 3, AArch64::LD3i32);
5054 return;
5055 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5056 VT == MVT::v1f64) {
5057 SelectLoadLane(Node, 3, AArch64::LD3i64);
5058 return;
5059 }
5060 break;
5061 case Intrinsic::aarch64_neon_ld4lane:
5062 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5063 SelectLoadLane(Node, 4, AArch64::LD4i8);
5064 return;
5065 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5066 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5067 SelectLoadLane(Node, 4, AArch64::LD4i16);
5068 return;
5069 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5070 VT == MVT::v2f32) {
5071 SelectLoadLane(Node, 4, AArch64::LD4i32);
5072 return;
5073 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5074 VT == MVT::v1f64) {
5075 SelectLoadLane(Node, 4, AArch64::LD4i64);
5076 return;
5077 }
5078 break;
5079 case Intrinsic::aarch64_ld64b:
5080 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5081 return;
5082 case Intrinsic::aarch64_sve_ld2q_sret: {
5083 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5084 return;
5085 }
5086 case Intrinsic::aarch64_sve_ld3q_sret: {
5087 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5088 return;
5089 }
5090 case Intrinsic::aarch64_sve_ld4q_sret: {
5091 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5092 return;
5093 }
5094 case Intrinsic::aarch64_sve_ld2_sret: {
5095 if (VT == MVT::nxv16i8) {
5096 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5097 true);
5098 return;
5099 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5100 VT == MVT::nxv8bf16) {
5101 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5102 true);
5103 return;
5104 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5105 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5106 true);
5107 return;
5108 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5109 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5110 true);
5111 return;
5112 }
5113 break;
5114 }
5115 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5116 if (VT == MVT::nxv16i8) {
5117 if (Subtarget->hasSME2())
5118 SelectContiguousMultiVectorLoad(
5119 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5120 else if (Subtarget->hasSVE2p1())
5121 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5122 AArch64::LD1B_2Z);
5123 else
5124 break;
5125 return;
5126 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5127 VT == MVT::nxv8bf16) {
5128 if (Subtarget->hasSME2())
5129 SelectContiguousMultiVectorLoad(
5130 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5131 else if (Subtarget->hasSVE2p1())
5132 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5133 AArch64::LD1H_2Z);
5134 else
5135 break;
5136 return;
5137 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5138 if (Subtarget->hasSME2())
5139 SelectContiguousMultiVectorLoad(
5140 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5141 else if (Subtarget->hasSVE2p1())
5142 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5143 AArch64::LD1W_2Z);
5144 else
5145 break;
5146 return;
5147 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5148 if (Subtarget->hasSME2())
5149 SelectContiguousMultiVectorLoad(
5150 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5151 else if (Subtarget->hasSVE2p1())
5152 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5153 AArch64::LD1D_2Z);
5154 else
5155 break;
5156 return;
5157 }
5158 break;
5159 }
5160 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5161 if (VT == MVT::nxv16i8) {
5162 if (Subtarget->hasSME2())
5163 SelectContiguousMultiVectorLoad(
5164 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5165 else if (Subtarget->hasSVE2p1())
5166 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5167 AArch64::LD1B_4Z);
5168 else
5169 break;
5170 return;
5171 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5172 VT == MVT::nxv8bf16) {
5173 if (Subtarget->hasSME2())
5174 SelectContiguousMultiVectorLoad(
5175 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5176 else if (Subtarget->hasSVE2p1())
5177 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5178 AArch64::LD1H_4Z);
5179 else
5180 break;
5181 return;
5182 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5183 if (Subtarget->hasSME2())
5184 SelectContiguousMultiVectorLoad(
5185 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5186 else if (Subtarget->hasSVE2p1())
5187 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5188 AArch64::LD1W_4Z);
5189 else
5190 break;
5191 return;
5192 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5193 if (Subtarget->hasSME2())
5194 SelectContiguousMultiVectorLoad(
5195 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5196 else if (Subtarget->hasSVE2p1())
5197 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5198 AArch64::LD1D_4Z);
5199 else
5200 break;
5201 return;
5202 }
5203 break;
5204 }
5205 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5206 if (VT == MVT::nxv16i8) {
5207 if (Subtarget->hasSME2())
5208 SelectContiguousMultiVectorLoad(Node, 2, 0,
5209 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5210 AArch64::LDNT1B_2Z_PSEUDO);
5211 else if (Subtarget->hasSVE2p1())
5212 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5213 AArch64::LDNT1B_2Z);
5214 else
5215 break;
5216 return;
5217 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5218 VT == MVT::nxv8bf16) {
5219 if (Subtarget->hasSME2())
5220 SelectContiguousMultiVectorLoad(Node, 2, 1,
5221 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5222 AArch64::LDNT1H_2Z_PSEUDO);
5223 else if (Subtarget->hasSVE2p1())
5224 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5225 AArch64::LDNT1H_2Z);
5226 else
5227 break;
5228 return;
5229 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5230 if (Subtarget->hasSME2())
5231 SelectContiguousMultiVectorLoad(Node, 2, 2,
5232 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5233 AArch64::LDNT1W_2Z_PSEUDO);
5234 else if (Subtarget->hasSVE2p1())
5235 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5236 AArch64::LDNT1W_2Z);
5237 else
5238 break;
5239 return;
5240 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5241 if (Subtarget->hasSME2())
5242 SelectContiguousMultiVectorLoad(Node, 2, 3,
5243 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5244 AArch64::LDNT1D_2Z_PSEUDO);
5245 else if (Subtarget->hasSVE2p1())
5246 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5247 AArch64::LDNT1D_2Z);
5248 else
5249 break;
5250 return;
5251 }
5252 break;
5253 }
5254 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5255 if (VT == MVT::nxv16i8) {
5256 if (Subtarget->hasSME2())
5257 SelectContiguousMultiVectorLoad(Node, 4, 0,
5258 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5259 AArch64::LDNT1B_4Z_PSEUDO);
5260 else if (Subtarget->hasSVE2p1())
5261 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5262 AArch64::LDNT1B_4Z);
5263 else
5264 break;
5265 return;
5266 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5267 VT == MVT::nxv8bf16) {
5268 if (Subtarget->hasSME2())
5269 SelectContiguousMultiVectorLoad(Node, 4, 1,
5270 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5271 AArch64::LDNT1H_4Z_PSEUDO);
5272 else if (Subtarget->hasSVE2p1())
5273 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5274 AArch64::LDNT1H_4Z);
5275 else
5276 break;
5277 return;
5278 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5279 if (Subtarget->hasSME2())
5280 SelectContiguousMultiVectorLoad(Node, 4, 2,
5281 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5282 AArch64::LDNT1W_4Z_PSEUDO);
5283 else if (Subtarget->hasSVE2p1())
5284 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5285 AArch64::LDNT1W_4Z);
5286 else
5287 break;
5288 return;
5289 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5290 if (Subtarget->hasSME2())
5291 SelectContiguousMultiVectorLoad(Node, 4, 3,
5292 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5293 AArch64::LDNT1D_4Z_PSEUDO);
5294 else if (Subtarget->hasSVE2p1())
5295 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5296 AArch64::LDNT1D_4Z);
5297 else
5298 break;
5299 return;
5300 }
5301 break;
5302 }
5303 case Intrinsic::aarch64_sve_ld3_sret: {
5304 if (VT == MVT::nxv16i8) {
5305 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5306 true);
5307 return;
5308 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5309 VT == MVT::nxv8bf16) {
5310 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5311 true);
5312 return;
5313 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5314 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5315 true);
5316 return;
5317 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5318 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5319 true);
5320 return;
5321 }
5322 break;
5323 }
5324 case Intrinsic::aarch64_sve_ld4_sret: {
5325 if (VT == MVT::nxv16i8) {
5326 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5327 true);
5328 return;
5329 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5330 VT == MVT::nxv8bf16) {
5331 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5332 true);
5333 return;
5334 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5335 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5336 true);
5337 return;
5338 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5339 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5340 true);
5341 return;
5342 }
5343 break;
5344 }
5345 case Intrinsic::aarch64_sme_read_hor_vg2: {
5346 if (VT == MVT::nxv16i8) {
5347 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5348 AArch64::MOVA_2ZMXI_H_B);
5349 return;
5350 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5351 VT == MVT::nxv8bf16) {
5352 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5353 AArch64::MOVA_2ZMXI_H_H);
5354 return;
5355 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5356 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5357 AArch64::MOVA_2ZMXI_H_S);
5358 return;
5359 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5360 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5361 AArch64::MOVA_2ZMXI_H_D);
5362 return;
5363 }
5364 break;
5365 }
5366 case Intrinsic::aarch64_sme_read_ver_vg2: {
5367 if (VT == MVT::nxv16i8) {
5368 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5369 AArch64::MOVA_2ZMXI_V_B);
5370 return;
5371 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5372 VT == MVT::nxv8bf16) {
5373 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5374 AArch64::MOVA_2ZMXI_V_H);
5375 return;
5376 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5377 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5378 AArch64::MOVA_2ZMXI_V_S);
5379 return;
5380 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5381 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5382 AArch64::MOVA_2ZMXI_V_D);
5383 return;
5384 }
5385 break;
5386 }
5387 case Intrinsic::aarch64_sme_read_hor_vg4: {
5388 if (VT == MVT::nxv16i8) {
5389 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5390 AArch64::MOVA_4ZMXI_H_B);
5391 return;
5392 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5393 VT == MVT::nxv8bf16) {
5394 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5395 AArch64::MOVA_4ZMXI_H_H);
5396 return;
5397 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5398 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5399 AArch64::MOVA_4ZMXI_H_S);
5400 return;
5401 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5402 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5403 AArch64::MOVA_4ZMXI_H_D);
5404 return;
5405 }
5406 break;
5407 }
5408 case Intrinsic::aarch64_sme_read_ver_vg4: {
5409 if (VT == MVT::nxv16i8) {
5410 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5411 AArch64::MOVA_4ZMXI_V_B);
5412 return;
5413 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5414 VT == MVT::nxv8bf16) {
5415 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5416 AArch64::MOVA_4ZMXI_V_H);
5417 return;
5418 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5419 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5420 AArch64::MOVA_4ZMXI_V_S);
5421 return;
5422 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5423 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5424 AArch64::MOVA_4ZMXI_V_D);
5425 return;
5426 }
5427 break;
5428 }
5429 case Intrinsic::aarch64_sme_read_vg1x2: {
5430 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5431 AArch64::MOVA_VG2_2ZMXI);
5432 return;
5433 }
5434 case Intrinsic::aarch64_sme_read_vg1x4: {
5435 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5436 AArch64::MOVA_VG4_4ZMXI);
5437 return;
5438 }
5439 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5440 if (VT == MVT::nxv16i8) {
5441 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5442 return;
5443 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5444 VT == MVT::nxv8bf16) {
5445 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5446 return;
5447 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5448 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5449 return;
5450 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5451 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5452 return;
5453 }
5454 break;
5455 }
5456 case Intrinsic::aarch64_sme_readz_vert_x2: {
5457 if (VT == MVT::nxv16i8) {
5458 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5459 return;
5460 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5461 VT == MVT::nxv8bf16) {
5462 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5463 return;
5464 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5465 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5466 return;
5467 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5468 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5469 return;
5470 }
5471 break;
5472 }
5473 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5474 if (VT == MVT::nxv16i8) {
5475 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5476 return;
5477 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5478 VT == MVT::nxv8bf16) {
5479 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5480 return;
5481 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5482 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5483 return;
5484 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5485 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5486 return;
5487 }
5488 break;
5489 }
5490 case Intrinsic::aarch64_sme_readz_vert_x4: {
5491 if (VT == MVT::nxv16i8) {
5492 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5493 return;
5494 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5495 VT == MVT::nxv8bf16) {
5496 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5497 return;
5498 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5499 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5500 return;
5501 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5502 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5503 return;
5504 }
5505 break;
5506 }
5507 case Intrinsic::aarch64_sme_readz_x2: {
5508 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5509 AArch64::ZA);
5510 return;
5511 }
5512 case Intrinsic::aarch64_sme_readz_x4: {
5513 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5514 AArch64::ZA);
5515 return;
5516 }
5517 case Intrinsic::swift_async_context_addr: {
5518 SDLoc DL(Node);
5519 SDValue Chain = Node->getOperand(0);
5520 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5521 SDValue Res = SDValue(
5522 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5523 CurDAG->getTargetConstant(8, DL, MVT::i32),
5524 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5525 0);
5526 ReplaceUses(SDValue(Node, 0), Res);
5527 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5528 CurDAG->RemoveDeadNode(Node);
5529
5530 auto &MF = CurDAG->getMachineFunction();
5531 MF.getFrameInfo().setFrameAddressIsTaken(true);
5532 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5533 return;
5534 }
5535 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5536 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5537 Node->getValueType(0),
5538 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5539 AArch64::LUTI2_4ZTZI_S}))
5540 // Second Immediate must be <= 3:
5541 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5542 return;
5543 }
5544 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5545 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5546 Node->getValueType(0),
5547 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5548 // Second Immediate must be <= 1:
5549 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5550 return;
5551 }
5552 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5553 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5554 Node->getValueType(0),
5555 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5556 AArch64::LUTI2_2ZTZI_S}))
5557 // Second Immediate must be <= 7:
5558 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5559 return;
5560 }
5561 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5562 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5563 Node->getValueType(0),
5564 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5565 AArch64::LUTI4_2ZTZI_S}))
5566 // Second Immediate must be <= 3:
5567 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5568 return;
5569 }
5570 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5571 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5572 return;
5573 }
5574 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5575 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5576 Node->getValueType(0),
5577 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5578 SelectCVTIntrinsicFP8(Node, 2, Opc);
5579 return;
5580 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5581 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5582 Node->getValueType(0),
5583 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5584 SelectCVTIntrinsicFP8(Node, 2, Opc);
5585 return;
5586 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5587 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5588 Node->getValueType(0),
5589 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5590 SelectCVTIntrinsicFP8(Node, 2, Opc);
5591 return;
5592 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5593 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5594 Node->getValueType(0),
5595 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5596 SelectCVTIntrinsicFP8(Node, 2, Opc);
5597 return;
5598 }
5599 } break;
5601 unsigned IntNo = Node->getConstantOperandVal(0);
5602 switch (IntNo) {
5603 default:
5604 break;
5605 case Intrinsic::aarch64_tagp:
5606 SelectTagP(Node);
5607 return;
5608
5609 case Intrinsic::ptrauth_auth:
5610 SelectPtrauthAuth(Node);
5611 return;
5612
5613 case Intrinsic::ptrauth_resign:
5614 SelectPtrauthResign(Node);
5615 return;
5616
5617 case Intrinsic::aarch64_neon_tbl2:
5618 SelectTable(Node, 2,
5619 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5620 false);
5621 return;
5622 case Intrinsic::aarch64_neon_tbl3:
5623 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5624 : AArch64::TBLv16i8Three,
5625 false);
5626 return;
5627 case Intrinsic::aarch64_neon_tbl4:
5628 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5629 : AArch64::TBLv16i8Four,
5630 false);
5631 return;
5632 case Intrinsic::aarch64_neon_tbx2:
5633 SelectTable(Node, 2,
5634 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5635 true);
5636 return;
5637 case Intrinsic::aarch64_neon_tbx3:
5638 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5639 : AArch64::TBXv16i8Three,
5640 true);
5641 return;
5642 case Intrinsic::aarch64_neon_tbx4:
5643 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5644 : AArch64::TBXv16i8Four,
5645 true);
5646 return;
5647 case Intrinsic::aarch64_sve_srshl_single_x2:
5648 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5649 Node->getValueType(0),
5650 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5651 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5652 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5653 return;
5654 case Intrinsic::aarch64_sve_srshl_single_x4:
5655 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5656 Node->getValueType(0),
5657 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5658 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5659 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5660 return;
5661 case Intrinsic::aarch64_sve_urshl_single_x2:
5662 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5663 Node->getValueType(0),
5664 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5665 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5666 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5667 return;
5668 case Intrinsic::aarch64_sve_urshl_single_x4:
5669 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5670 Node->getValueType(0),
5671 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5672 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5673 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5674 return;
5675 case Intrinsic::aarch64_sve_srshl_x2:
5676 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5677 Node->getValueType(0),
5678 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5679 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5680 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5681 return;
5682 case Intrinsic::aarch64_sve_srshl_x4:
5683 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5684 Node->getValueType(0),
5685 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5686 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5687 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5688 return;
5689 case Intrinsic::aarch64_sve_urshl_x2:
5690 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5691 Node->getValueType(0),
5692 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5693 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5694 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5695 return;
5696 case Intrinsic::aarch64_sve_urshl_x4:
5697 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5698 Node->getValueType(0),
5699 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5700 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5701 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5702 return;
5703 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5704 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5705 Node->getValueType(0),
5706 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5707 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5708 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5709 return;
5710 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5711 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5712 Node->getValueType(0),
5713 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5714 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5715 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5716 return;
5717 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5718 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5719 Node->getValueType(0),
5720 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5721 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5722 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5723 return;
5724 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5725 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5726 Node->getValueType(0),
5727 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5728 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5729 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5730 return;
5731 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
5732 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5733 Node->getValueType(0),
5734 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
5735 AArch64::FSCALE_2ZZ_D}))
5736 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5737 return;
5738 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
5739 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5740 Node->getValueType(0),
5741 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
5742 AArch64::FSCALE_4ZZ_D}))
5743 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5744 return;
5745 case Intrinsic::aarch64_sme_fp8_scale_x2:
5746 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5747 Node->getValueType(0),
5748 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
5749 AArch64::FSCALE_2Z2Z_D}))
5750 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5751 return;
5752 case Intrinsic::aarch64_sme_fp8_scale_x4:
5753 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5754 Node->getValueType(0),
5755 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
5756 AArch64::FSCALE_4Z4Z_D}))
5757 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5758 return;
5759 case Intrinsic::aarch64_sve_whilege_x2:
5760 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5761 Node->getValueType(0),
5762 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5763 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5764 SelectWhilePair(Node, Op);
5765 return;
5766 case Intrinsic::aarch64_sve_whilegt_x2:
5767 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5768 Node->getValueType(0),
5769 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5770 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5771 SelectWhilePair(Node, Op);
5772 return;
5773 case Intrinsic::aarch64_sve_whilehi_x2:
5774 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5775 Node->getValueType(0),
5776 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5777 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5778 SelectWhilePair(Node, Op);
5779 return;
5780 case Intrinsic::aarch64_sve_whilehs_x2:
5781 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5782 Node->getValueType(0),
5783 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5784 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5785 SelectWhilePair(Node, Op);
5786 return;
5787 case Intrinsic::aarch64_sve_whilele_x2:
5788 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5789 Node->getValueType(0),
5790 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5791 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5792 SelectWhilePair(Node, Op);
5793 return;
5794 case Intrinsic::aarch64_sve_whilelo_x2:
5795 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5796 Node->getValueType(0),
5797 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5798 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5799 SelectWhilePair(Node, Op);
5800 return;
5801 case Intrinsic::aarch64_sve_whilels_x2:
5802 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5803 Node->getValueType(0),
5804 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5805 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5806 SelectWhilePair(Node, Op);
5807 return;
5808 case Intrinsic::aarch64_sve_whilelt_x2:
5809 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5810 Node->getValueType(0),
5811 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5812 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5813 SelectWhilePair(Node, Op);
5814 return;
5815 case Intrinsic::aarch64_sve_smax_single_x2:
5816 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5817 Node->getValueType(0),
5818 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
5819 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
5820 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5821 return;
5822 case Intrinsic::aarch64_sve_umax_single_x2:
5823 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5824 Node->getValueType(0),
5825 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
5826 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
5827 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5828 return;
5829 case Intrinsic::aarch64_sve_fmax_single_x2:
5830 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5831 Node->getValueType(0),
5832 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
5833 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
5834 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5835 return;
5836 case Intrinsic::aarch64_sve_smax_single_x4:
5837 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5838 Node->getValueType(0),
5839 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
5840 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
5841 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5842 return;
5843 case Intrinsic::aarch64_sve_umax_single_x4:
5844 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5845 Node->getValueType(0),
5846 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
5847 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
5848 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5849 return;
5850 case Intrinsic::aarch64_sve_fmax_single_x4:
5851 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5852 Node->getValueType(0),
5853 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
5854 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
5855 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5856 return;
5857 case Intrinsic::aarch64_sve_smin_single_x2:
5858 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5859 Node->getValueType(0),
5860 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
5861 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
5862 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5863 return;
5864 case Intrinsic::aarch64_sve_umin_single_x2:
5865 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5866 Node->getValueType(0),
5867 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
5868 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
5869 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5870 return;
5871 case Intrinsic::aarch64_sve_fmin_single_x2:
5872 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5873 Node->getValueType(0),
5874 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
5875 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
5876 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5877 return;
5878 case Intrinsic::aarch64_sve_smin_single_x4:
5879 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5880 Node->getValueType(0),
5881 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
5882 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
5883 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5884 return;
5885 case Intrinsic::aarch64_sve_umin_single_x4:
5886 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5887 Node->getValueType(0),
5888 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
5889 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
5890 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5891 return;
5892 case Intrinsic::aarch64_sve_fmin_single_x4:
5893 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5894 Node->getValueType(0),
5895 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
5896 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
5897 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5898 return;
5899 case Intrinsic::aarch64_sve_smax_x2:
5900 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5901 Node->getValueType(0),
5902 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
5903 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
5904 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5905 return;
5906 case Intrinsic::aarch64_sve_umax_x2:
5907 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5908 Node->getValueType(0),
5909 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
5910 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
5911 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5912 return;
5913 case Intrinsic::aarch64_sve_fmax_x2:
5914 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5915 Node->getValueType(0),
5916 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
5917 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
5918 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5919 return;
5920 case Intrinsic::aarch64_sve_smax_x4:
5921 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5922 Node->getValueType(0),
5923 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
5924 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
5925 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5926 return;
5927 case Intrinsic::aarch64_sve_umax_x4:
5928 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5929 Node->getValueType(0),
5930 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
5931 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
5932 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5933 return;
5934 case Intrinsic::aarch64_sve_fmax_x4:
5935 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5936 Node->getValueType(0),
5937 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
5938 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
5939 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5940 return;
5941 case Intrinsic::aarch64_sme_famax_x2:
5942 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5943 Node->getValueType(0),
5944 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
5945 AArch64::FAMAX_2Z2Z_D}))
5946 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5947 return;
5948 case Intrinsic::aarch64_sme_famax_x4:
5949 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5950 Node->getValueType(0),
5951 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
5952 AArch64::FAMAX_4Z4Z_D}))
5953 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5954 return;
5955 case Intrinsic::aarch64_sme_famin_x2:
5956 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5957 Node->getValueType(0),
5958 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
5959 AArch64::FAMIN_2Z2Z_D}))
5960 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5961 return;
5962 case Intrinsic::aarch64_sme_famin_x4:
5963 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5964 Node->getValueType(0),
5965 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
5966 AArch64::FAMIN_4Z4Z_D}))
5967 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5968 return;
5969 case Intrinsic::aarch64_sve_smin_x2:
5970 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5971 Node->getValueType(0),
5972 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
5973 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
5974 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5975 return;
5976 case Intrinsic::aarch64_sve_umin_x2:
5977 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5978 Node->getValueType(0),
5979 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
5980 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
5981 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5982 return;
5983 case Intrinsic::aarch64_sve_fmin_x2:
5984 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5985 Node->getValueType(0),
5986 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
5987 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
5988 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5989 return;
5990 case Intrinsic::aarch64_sve_smin_x4:
5991 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5992 Node->getValueType(0),
5993 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
5994 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
5995 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5996 return;
5997 case Intrinsic::aarch64_sve_umin_x4:
5998 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5999 Node->getValueType(0),
6000 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6001 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6002 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6003 return;
6004 case Intrinsic::aarch64_sve_fmin_x4:
6005 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6006 Node->getValueType(0),
6007 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6008 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6009 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6010 return;
6011 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6012 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6013 Node->getValueType(0),
6014 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6015 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6016 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6017 return;
6018 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6019 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6020 Node->getValueType(0),
6021 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6022 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6023 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6024 return;
6025 case Intrinsic::aarch64_sve_fminnm_single_x2:
6026 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6027 Node->getValueType(0),
6028 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6029 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6030 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6031 return;
6032 case Intrinsic::aarch64_sve_fminnm_single_x4:
6033 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6034 Node->getValueType(0),
6035 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6036 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6037 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6038 return;
6039 case Intrinsic::aarch64_sve_fmaxnm_x2:
6040 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6041 Node->getValueType(0),
6042 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6043 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6044 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6045 return;
6046 case Intrinsic::aarch64_sve_fmaxnm_x4:
6047 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6048 Node->getValueType(0),
6049 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6050 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6051 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6052 return;
6053 case Intrinsic::aarch64_sve_fminnm_x2:
6054 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6055 Node->getValueType(0),
6056 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6057 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6058 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6059 return;
6060 case Intrinsic::aarch64_sve_fminnm_x4:
6061 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6062 Node->getValueType(0),
6063 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6064 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6065 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6066 return;
6067 case Intrinsic::aarch64_sve_fcvtzs_x2:
6068 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6069 return;
6070 case Intrinsic::aarch64_sve_scvtf_x2:
6071 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6072 return;
6073 case Intrinsic::aarch64_sve_fcvtzu_x2:
6074 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6075 return;
6076 case Intrinsic::aarch64_sve_ucvtf_x2:
6077 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6078 return;
6079 case Intrinsic::aarch64_sve_fcvtzs_x4:
6080 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6081 return;
6082 case Intrinsic::aarch64_sve_scvtf_x4:
6083 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6084 return;
6085 case Intrinsic::aarch64_sve_fcvtzu_x4:
6086 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6087 return;
6088 case Intrinsic::aarch64_sve_ucvtf_x4:
6089 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6090 return;
6091 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6092 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6093 return;
6094 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6095 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6096 return;
6097 case Intrinsic::aarch64_sve_sclamp_single_x2:
6098 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6099 Node->getValueType(0),
6100 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6101 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6102 SelectClamp(Node, 2, Op);
6103 return;
6104 case Intrinsic::aarch64_sve_uclamp_single_x2:
6105 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6106 Node->getValueType(0),
6107 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6108 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6109 SelectClamp(Node, 2, Op);
6110 return;
6111 case Intrinsic::aarch64_sve_fclamp_single_x2:
6112 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6113 Node->getValueType(0),
6114 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6115 AArch64::FCLAMP_VG2_2Z2Z_D}))
6116 SelectClamp(Node, 2, Op);
6117 return;
6118 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6119 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6120 return;
6121 case Intrinsic::aarch64_sve_sclamp_single_x4:
6122 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6123 Node->getValueType(0),
6124 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6125 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6126 SelectClamp(Node, 4, Op);
6127 return;
6128 case Intrinsic::aarch64_sve_uclamp_single_x4:
6129 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6130 Node->getValueType(0),
6131 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6132 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6133 SelectClamp(Node, 4, Op);
6134 return;
6135 case Intrinsic::aarch64_sve_fclamp_single_x4:
6136 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6137 Node->getValueType(0),
6138 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6139 AArch64::FCLAMP_VG4_4Z4Z_D}))
6140 SelectClamp(Node, 4, Op);
6141 return;
6142 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6143 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6144 return;
6145 case Intrinsic::aarch64_sve_add_single_x2:
6146 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6147 Node->getValueType(0),
6148 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6149 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6150 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6151 return;
6152 case Intrinsic::aarch64_sve_add_single_x4:
6153 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6154 Node->getValueType(0),
6155 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6156 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6157 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6158 return;
6159 case Intrinsic::aarch64_sve_zip_x2:
6160 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6161 Node->getValueType(0),
6162 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6163 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6164 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6165 return;
6166 case Intrinsic::aarch64_sve_zipq_x2:
6167 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6168 AArch64::ZIP_VG2_2ZZZ_Q);
6169 return;
6170 case Intrinsic::aarch64_sve_zip_x4:
6171 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6172 Node->getValueType(0),
6173 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6174 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6175 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6176 return;
6177 case Intrinsic::aarch64_sve_zipq_x4:
6178 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6179 AArch64::ZIP_VG4_4Z4Z_Q);
6180 return;
6181 case Intrinsic::aarch64_sve_uzp_x2:
6182 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6183 Node->getValueType(0),
6184 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6185 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6186 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6187 return;
6188 case Intrinsic::aarch64_sve_uzpq_x2:
6189 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6190 AArch64::UZP_VG2_2ZZZ_Q);
6191 return;
6192 case Intrinsic::aarch64_sve_uzp_x4:
6193 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6194 Node->getValueType(0),
6195 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6196 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6197 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6198 return;
6199 case Intrinsic::aarch64_sve_uzpq_x4:
6200 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6201 AArch64::UZP_VG4_4Z4Z_Q);
6202 return;
6203 case Intrinsic::aarch64_sve_sel_x2:
6204 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6205 Node->getValueType(0),
6206 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6207 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6208 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6209 return;
6210 case Intrinsic::aarch64_sve_sel_x4:
6211 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6212 Node->getValueType(0),
6213 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6214 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6215 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6216 return;
6217 case Intrinsic::aarch64_sve_frinta_x2:
6218 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6219 return;
6220 case Intrinsic::aarch64_sve_frinta_x4:
6221 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6222 return;
6223 case Intrinsic::aarch64_sve_frintm_x2:
6224 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6225 return;
6226 case Intrinsic::aarch64_sve_frintm_x4:
6227 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6228 return;
6229 case Intrinsic::aarch64_sve_frintn_x2:
6230 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6231 return;
6232 case Intrinsic::aarch64_sve_frintn_x4:
6233 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6234 return;
6235 case Intrinsic::aarch64_sve_frintp_x2:
6236 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6237 return;
6238 case Intrinsic::aarch64_sve_frintp_x4:
6239 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6240 return;
6241 case Intrinsic::aarch64_sve_sunpk_x2:
6242 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6243 Node->getValueType(0),
6244 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6245 AArch64::SUNPK_VG2_2ZZ_D}))
6246 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6247 return;
6248 case Intrinsic::aarch64_sve_uunpk_x2:
6249 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6250 Node->getValueType(0),
6251 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6252 AArch64::UUNPK_VG2_2ZZ_D}))
6253 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6254 return;
6255 case Intrinsic::aarch64_sve_sunpk_x4:
6256 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6257 Node->getValueType(0),
6258 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6259 AArch64::SUNPK_VG4_4Z2Z_D}))
6260 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6261 return;
6262 case Intrinsic::aarch64_sve_uunpk_x4:
6263 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6264 Node->getValueType(0),
6265 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6266 AArch64::UUNPK_VG4_4Z2Z_D}))
6267 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6268 return;
6269 case Intrinsic::aarch64_sve_pext_x2: {
6270 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6271 Node->getValueType(0),
6272 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6273 AArch64::PEXT_2PCI_D}))
6274 SelectPExtPair(Node, Op);
6275 return;
6276 }
6277 }
6278 break;
6279 }
6280 case ISD::INTRINSIC_VOID: {
6281 unsigned IntNo = Node->getConstantOperandVal(1);
6282 if (Node->getNumOperands() >= 3)
6283 VT = Node->getOperand(2)->getValueType(0);
6284 switch (IntNo) {
6285 default:
6286 break;
6287 case Intrinsic::aarch64_neon_st1x2: {
6288 if (VT == MVT::v8i8) {
6289 SelectStore(Node, 2, AArch64::ST1Twov8b);
6290 return;
6291 } else if (VT == MVT::v16i8) {
6292 SelectStore(Node, 2, AArch64::ST1Twov16b);
6293 return;
6294 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6295 VT == MVT::v4bf16) {
6296 SelectStore(Node, 2, AArch64::ST1Twov4h);
6297 return;
6298 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6299 VT == MVT::v8bf16) {
6300 SelectStore(Node, 2, AArch64::ST1Twov8h);
6301 return;
6302 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6303 SelectStore(Node, 2, AArch64::ST1Twov2s);
6304 return;
6305 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6306 SelectStore(Node, 2, AArch64::ST1Twov4s);
6307 return;
6308 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6309 SelectStore(Node, 2, AArch64::ST1Twov2d);
6310 return;
6311 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6312 SelectStore(Node, 2, AArch64::ST1Twov1d);
6313 return;
6314 }
6315 break;
6316 }
6317 case Intrinsic::aarch64_neon_st1x3: {
6318 if (VT == MVT::v8i8) {
6319 SelectStore(Node, 3, AArch64::ST1Threev8b);
6320 return;
6321 } else if (VT == MVT::v16i8) {
6322 SelectStore(Node, 3, AArch64::ST1Threev16b);
6323 return;
6324 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6325 VT == MVT::v4bf16) {
6326 SelectStore(Node, 3, AArch64::ST1Threev4h);
6327 return;
6328 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6329 VT == MVT::v8bf16) {
6330 SelectStore(Node, 3, AArch64::ST1Threev8h);
6331 return;
6332 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6333 SelectStore(Node, 3, AArch64::ST1Threev2s);
6334 return;
6335 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6336 SelectStore(Node, 3, AArch64::ST1Threev4s);
6337 return;
6338 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6339 SelectStore(Node, 3, AArch64::ST1Threev2d);
6340 return;
6341 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6342 SelectStore(Node, 3, AArch64::ST1Threev1d);
6343 return;
6344 }
6345 break;
6346 }
6347 case Intrinsic::aarch64_neon_st1x4: {
6348 if (VT == MVT::v8i8) {
6349 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6350 return;
6351 } else if (VT == MVT::v16i8) {
6352 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6353 return;
6354 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6355 VT == MVT::v4bf16) {
6356 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6357 return;
6358 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6359 VT == MVT::v8bf16) {
6360 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6361 return;
6362 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6363 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6364 return;
6365 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6366 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6367 return;
6368 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6369 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6370 return;
6371 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6372 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6373 return;
6374 }
6375 break;
6376 }
6377 case Intrinsic::aarch64_neon_st2: {
6378 if (VT == MVT::v8i8) {
6379 SelectStore(Node, 2, AArch64::ST2Twov8b);
6380 return;
6381 } else if (VT == MVT::v16i8) {
6382 SelectStore(Node, 2, AArch64::ST2Twov16b);
6383 return;
6384 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6385 VT == MVT::v4bf16) {
6386 SelectStore(Node, 2, AArch64::ST2Twov4h);
6387 return;
6388 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6389 VT == MVT::v8bf16) {
6390 SelectStore(Node, 2, AArch64::ST2Twov8h);
6391 return;
6392 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6393 SelectStore(Node, 2, AArch64::ST2Twov2s);
6394 return;
6395 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6396 SelectStore(Node, 2, AArch64::ST2Twov4s);
6397 return;
6398 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6399 SelectStore(Node, 2, AArch64::ST2Twov2d);
6400 return;
6401 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6402 SelectStore(Node, 2, AArch64::ST1Twov1d);
6403 return;
6404 }
6405 break;
6406 }
6407 case Intrinsic::aarch64_neon_st3: {
6408 if (VT == MVT::v8i8) {
6409 SelectStore(Node, 3, AArch64::ST3Threev8b);
6410 return;
6411 } else if (VT == MVT::v16i8) {
6412 SelectStore(Node, 3, AArch64::ST3Threev16b);
6413 return;
6414 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6415 VT == MVT::v4bf16) {
6416 SelectStore(Node, 3, AArch64::ST3Threev4h);
6417 return;
6418 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6419 VT == MVT::v8bf16) {
6420 SelectStore(Node, 3, AArch64::ST3Threev8h);
6421 return;
6422 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6423 SelectStore(Node, 3, AArch64::ST3Threev2s);
6424 return;
6425 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6426 SelectStore(Node, 3, AArch64::ST3Threev4s);
6427 return;
6428 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6429 SelectStore(Node, 3, AArch64::ST3Threev2d);
6430 return;
6431 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6432 SelectStore(Node, 3, AArch64::ST1Threev1d);
6433 return;
6434 }
6435 break;
6436 }
6437 case Intrinsic::aarch64_neon_st4: {
6438 if (VT == MVT::v8i8) {
6439 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6440 return;
6441 } else if (VT == MVT::v16i8) {
6442 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6443 return;
6444 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6445 VT == MVT::v4bf16) {
6446 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6447 return;
6448 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6449 VT == MVT::v8bf16) {
6450 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6451 return;
6452 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6453 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6454 return;
6455 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6456 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6457 return;
6458 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6459 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6460 return;
6461 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6462 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6463 return;
6464 }
6465 break;
6466 }
6467 case Intrinsic::aarch64_neon_st2lane: {
6468 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6469 SelectStoreLane(Node, 2, AArch64::ST2i8);
6470 return;
6471 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6472 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6473 SelectStoreLane(Node, 2, AArch64::ST2i16);
6474 return;
6475 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6476 VT == MVT::v2f32) {
6477 SelectStoreLane(Node, 2, AArch64::ST2i32);
6478 return;
6479 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6480 VT == MVT::v1f64) {
6481 SelectStoreLane(Node, 2, AArch64::ST2i64);
6482 return;
6483 }
6484 break;
6485 }
6486 case Intrinsic::aarch64_neon_st3lane: {
6487 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6488 SelectStoreLane(Node, 3, AArch64::ST3i8);
6489 return;
6490 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6491 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6492 SelectStoreLane(Node, 3, AArch64::ST3i16);
6493 return;
6494 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6495 VT == MVT::v2f32) {
6496 SelectStoreLane(Node, 3, AArch64::ST3i32);
6497 return;
6498 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6499 VT == MVT::v1f64) {
6500 SelectStoreLane(Node, 3, AArch64::ST3i64);
6501 return;
6502 }
6503 break;
6504 }
6505 case Intrinsic::aarch64_neon_st4lane: {
6506 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6507 SelectStoreLane(Node, 4, AArch64::ST4i8);
6508 return;
6509 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6510 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6511 SelectStoreLane(Node, 4, AArch64::ST4i16);
6512 return;
6513 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6514 VT == MVT::v2f32) {
6515 SelectStoreLane(Node, 4, AArch64::ST4i32);
6516 return;
6517 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6518 VT == MVT::v1f64) {
6519 SelectStoreLane(Node, 4, AArch64::ST4i64);
6520 return;
6521 }
6522 break;
6523 }
6524 case Intrinsic::aarch64_sve_st2q: {
6525 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6526 return;
6527 }
6528 case Intrinsic::aarch64_sve_st3q: {
6529 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6530 return;
6531 }
6532 case Intrinsic::aarch64_sve_st4q: {
6533 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6534 return;
6535 }
6536 case Intrinsic::aarch64_sve_st2: {
6537 if (VT == MVT::nxv16i8) {
6538 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6539 return;
6540 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6541 VT == MVT::nxv8bf16) {
6542 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6543 return;
6544 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6545 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6546 return;
6547 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6548 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6549 return;
6550 }
6551 break;
6552 }
6553 case Intrinsic::aarch64_sve_st3: {
6554 if (VT == MVT::nxv16i8) {
6555 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6556 return;
6557 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6558 VT == MVT::nxv8bf16) {
6559 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6560 return;
6561 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6562 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6563 return;
6564 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6565 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6566 return;
6567 }
6568 break;
6569 }
6570 case Intrinsic::aarch64_sve_st4: {
6571 if (VT == MVT::nxv16i8) {
6572 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6573 return;
6574 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6575 VT == MVT::nxv8bf16) {
6576 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6577 return;
6578 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6579 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6580 return;
6581 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6582 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6583 return;
6584 }
6585 break;
6586 }
6587 }
6588 break;
6589 }
6590 case AArch64ISD::LD2post: {
6591 if (VT == MVT::v8i8) {
6592 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6593 return;
6594 } else if (VT == MVT::v16i8) {
6595 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6596 return;
6597 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6598 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6599 return;
6600 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6601 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6602 return;
6603 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6604 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6605 return;
6606 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6607 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6608 return;
6609 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6610 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6611 return;
6612 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6613 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6614 return;
6615 }
6616 break;
6617 }
6618 case AArch64ISD::LD3post: {
6619 if (VT == MVT::v8i8) {
6620 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6621 return;
6622 } else if (VT == MVT::v16i8) {
6623 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6624 return;
6625 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6626 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6627 return;
6628 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6629 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6630 return;
6631 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6632 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6633 return;
6634 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6635 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6636 return;
6637 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6638 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6639 return;
6640 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6641 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6642 return;
6643 }
6644 break;
6645 }
6646 case AArch64ISD::LD4post: {
6647 if (VT == MVT::v8i8) {
6648 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6649 return;
6650 } else if (VT == MVT::v16i8) {
6651 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6652 return;
6653 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6654 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6655 return;
6656 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6657 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6658 return;
6659 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6660 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6661 return;
6662 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6663 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6664 return;
6665 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6666 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6667 return;
6668 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6669 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6670 return;
6671 }
6672 break;
6673 }
6674 case AArch64ISD::LD1x2post: {
6675 if (VT == MVT::v8i8) {
6676 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6677 return;
6678 } else if (VT == MVT::v16i8) {
6679 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6680 return;
6681 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6682 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6683 return;
6684 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6685 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6686 return;
6687 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6688 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6689 return;
6690 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6691 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6692 return;
6693 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6694 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6695 return;
6696 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6697 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6698 return;
6699 }
6700 break;
6701 }
6702 case AArch64ISD::LD1x3post: {
6703 if (VT == MVT::v8i8) {
6704 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6705 return;
6706 } else if (VT == MVT::v16i8) {
6707 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6708 return;
6709 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6710 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6711 return;
6712 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6713 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6714 return;
6715 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6716 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6717 return;
6718 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6719 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6720 return;
6721 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6722 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6723 return;
6724 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6725 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6726 return;
6727 }
6728 break;
6729 }
6730 case AArch64ISD::LD1x4post: {
6731 if (VT == MVT::v8i8) {
6732 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6733 return;
6734 } else if (VT == MVT::v16i8) {
6735 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6736 return;
6737 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6738 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6739 return;
6740 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6741 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6742 return;
6743 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6744 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6745 return;
6746 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6747 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6748 return;
6749 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6750 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6751 return;
6752 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6753 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6754 return;
6755 }
6756 break;
6757 }
6759 if (VT == MVT::v8i8) {
6760 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6761 return;
6762 } else if (VT == MVT::v16i8) {
6763 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6764 return;
6765 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6766 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6767 return;
6768 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6769 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6770 return;
6771 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6772 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6773 return;
6774 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6775 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6776 return;
6777 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6778 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6779 return;
6780 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6781 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6782 return;
6783 }
6784 break;
6785 }
6787 if (VT == MVT::v8i8) {
6788 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6789 return;
6790 } else if (VT == MVT::v16i8) {
6791 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6792 return;
6793 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6794 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6795 return;
6796 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6797 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
6798 return;
6799 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6800 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
6801 return;
6802 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6803 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6804 return;
6805 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6806 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
6807 return;
6808 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6809 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
6810 return;
6811 }
6812 break;
6813 }
6815 if (VT == MVT::v8i8) {
6816 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
6817 return;
6818 } else if (VT == MVT::v16i8) {
6819 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
6820 return;
6821 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6822 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
6823 return;
6824 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6825 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
6826 return;
6827 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6828 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
6829 return;
6830 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6831 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
6832 return;
6833 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6834 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
6835 return;
6836 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6837 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
6838 return;
6839 }
6840 break;
6841 }
6843 if (VT == MVT::v8i8) {
6844 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
6845 return;
6846 } else if (VT == MVT::v16i8) {
6847 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
6848 return;
6849 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6850 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
6851 return;
6852 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6853 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
6854 return;
6855 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6856 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
6857 return;
6858 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6859 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
6860 return;
6861 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6862 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
6863 return;
6864 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6865 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
6866 return;
6867 }
6868 break;
6869 }
6871 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6872 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
6873 return;
6874 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6875 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6876 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
6877 return;
6878 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6879 VT == MVT::v2f32) {
6880 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
6881 return;
6882 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6883 VT == MVT::v1f64) {
6884 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
6885 return;
6886 }
6887 break;
6888 }
6890 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6891 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
6892 return;
6893 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6894 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6895 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
6896 return;
6897 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6898 VT == MVT::v2f32) {
6899 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
6900 return;
6901 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6902 VT == MVT::v1f64) {
6903 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
6904 return;
6905 }
6906 break;
6907 }
6909 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6910 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
6911 return;
6912 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6913 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6914 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
6915 return;
6916 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6917 VT == MVT::v2f32) {
6918 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
6919 return;
6920 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6921 VT == MVT::v1f64) {
6922 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
6923 return;
6924 }
6925 break;
6926 }
6928 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6929 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
6930 return;
6931 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6932 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6933 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
6934 return;
6935 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6936 VT == MVT::v2f32) {
6937 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
6938 return;
6939 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6940 VT == MVT::v1f64) {
6941 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
6942 return;
6943 }
6944 break;
6945 }
6946 case AArch64ISD::ST2post: {
6947 VT = Node->getOperand(1).getValueType();
6948 if (VT == MVT::v8i8) {
6949 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
6950 return;
6951 } else if (VT == MVT::v16i8) {
6952 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
6953 return;
6954 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6955 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
6956 return;
6957 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6958 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
6959 return;
6960 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6961 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
6962 return;
6963 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6964 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
6965 return;
6966 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6967 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
6968 return;
6969 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6970 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6971 return;
6972 }
6973 break;
6974 }
6975 case AArch64ISD::ST3post: {
6976 VT = Node->getOperand(1).getValueType();
6977 if (VT == MVT::v8i8) {
6978 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
6979 return;
6980 } else if (VT == MVT::v16i8) {
6981 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
6982 return;
6983 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6984 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
6985 return;
6986 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6987 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
6988 return;
6989 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6990 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
6991 return;
6992 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6993 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
6994 return;
6995 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6996 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
6997 return;
6998 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6999 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7000 return;
7001 }
7002 break;
7003 }
7004 case AArch64ISD::ST4post: {
7005 VT = Node->getOperand(1).getValueType();
7006 if (VT == MVT::v8i8) {
7007 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7008 return;
7009 } else if (VT == MVT::v16i8) {
7010 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7011 return;
7012 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7013 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7014 return;
7015 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7016 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7017 return;
7018 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7019 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7020 return;
7021 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7022 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7023 return;
7024 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7025 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7026 return;
7027 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7028 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7029 return;
7030 }
7031 break;
7032 }
7033 case AArch64ISD::ST1x2post: {
7034 VT = Node->getOperand(1).getValueType();
7035 if (VT == MVT::v8i8) {
7036 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7037 return;
7038 } else if (VT == MVT::v16i8) {
7039 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7040 return;
7041 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7042 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7043 return;
7044 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7045 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7046 return;
7047 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7048 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7049 return;
7050 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7051 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7052 return;
7053 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7054 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7055 return;
7056 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7057 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7058 return;
7059 }
7060 break;
7061 }
7062 case AArch64ISD::ST1x3post: {
7063 VT = Node->getOperand(1).getValueType();
7064 if (VT == MVT::v8i8) {
7065 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7066 return;
7067 } else if (VT == MVT::v16i8) {
7068 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7069 return;
7070 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7071 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7072 return;
7073 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7074 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7075 return;
7076 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7077 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7078 return;
7079 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7080 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7081 return;
7082 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7083 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7084 return;
7085 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7086 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7087 return;
7088 }
7089 break;
7090 }
7091 case AArch64ISD::ST1x4post: {
7092 VT = Node->getOperand(1).getValueType();
7093 if (VT == MVT::v8i8) {
7094 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7095 return;
7096 } else if (VT == MVT::v16i8) {
7097 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7098 return;
7099 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7100 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7101 return;
7102 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7103 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7104 return;
7105 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7106 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7107 return;
7108 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7109 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7110 return;
7111 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7112 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7113 return;
7114 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7115 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7116 return;
7117 }
7118 break;
7119 }
7121 VT = Node->getOperand(1).getValueType();
7122 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7123 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7124 return;
7125 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7126 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7127 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7128 return;
7129 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7130 VT == MVT::v2f32) {
7131 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7132 return;
7133 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7134 VT == MVT::v1f64) {
7135 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7136 return;
7137 }
7138 break;
7139 }
7141 VT = Node->getOperand(1).getValueType();
7142 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7143 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7144 return;
7145 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7146 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7147 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7148 return;
7149 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7150 VT == MVT::v2f32) {
7151 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7152 return;
7153 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7154 VT == MVT::v1f64) {
7155 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7156 return;
7157 }
7158 break;
7159 }
7161 VT = Node->getOperand(1).getValueType();
7162 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7163 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7164 return;
7165 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7166 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7167 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7168 return;
7169 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7170 VT == MVT::v2f32) {
7171 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7172 return;
7173 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7174 VT == MVT::v1f64) {
7175 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7176 return;
7177 }
7178 break;
7179 }
7181 if (VT == MVT::nxv16i8) {
7182 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
7183 return;
7184 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7185 VT == MVT::nxv8bf16) {
7186 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
7187 return;
7188 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7189 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
7190 return;
7191 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7192 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
7193 return;
7194 }
7195 break;
7196 }
7198 if (VT == MVT::nxv16i8) {
7199 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
7200 return;
7201 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7202 VT == MVT::nxv8bf16) {
7203 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
7204 return;
7205 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7206 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
7207 return;
7208 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7209 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
7210 return;
7211 }
7212 break;
7213 }
7215 if (VT == MVT::nxv16i8) {
7216 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
7217 return;
7218 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7219 VT == MVT::nxv8bf16) {
7220 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
7221 return;
7222 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7223 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
7224 return;
7225 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7226 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
7227 return;
7228 }
7229 break;
7230 }
7231 }
7232
7233 // Select the default instruction
7234 SelectCode(Node);
7235}
7236
7237/// createAArch64ISelDag - This pass converts a legalized DAG into a
7238/// AArch64-specific DAG, ready for instruction scheduling.
7240 CodeGenOptLevel OptLevel) {
7241 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7242}
7243
7244/// When \p PredVT is a scalable vector predicate in the form
7245/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7246/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7247/// structured vectors (NumVec >1), the output data type is
7248/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7249/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7250/// EVT.
7252 unsigned NumVec) {
7253 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7254 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7255 return EVT();
7256
7257 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7258 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7259 return EVT();
7260
7261 ElementCount EC = PredVT.getVectorElementCount();
7262 EVT ScalarVT =
7263 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7264 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7265
7266 return MemVT;
7267}
7268
7269/// Return the EVT of the data associated to a memory operation in \p
7270/// Root. If such EVT cannot be retrived, it returns an invalid EVT.
7272 if (isa<MemSDNode>(Root))
7273 return cast<MemSDNode>(Root)->getMemoryVT();
7274
7275 if (isa<MemIntrinsicSDNode>(Root))
7276 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
7277
7278 const unsigned Opcode = Root->getOpcode();
7279 // For custom ISD nodes, we have to look at them individually to extract the
7280 // type of the data moved to/from memory.
7281 switch (Opcode) {
7286 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7288 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7291 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
7294 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
7297 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
7298 default:
7299 break;
7300 }
7301
7302 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7303 return EVT();
7304
7305 switch (Root->getConstantOperandVal(1)) {
7306 default:
7307 return EVT();
7308 case Intrinsic::aarch64_sme_ldr:
7309 case Intrinsic::aarch64_sme_str:
7310 return MVT::nxv16i8;
7311 case Intrinsic::aarch64_sve_prf:
7312 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7313 // width of the predicate.
7315 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7316 case Intrinsic::aarch64_sve_ld2_sret:
7317 case Intrinsic::aarch64_sve_ld2q_sret:
7319 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7320 case Intrinsic::aarch64_sve_st2q:
7322 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7323 case Intrinsic::aarch64_sve_ld3_sret:
7324 case Intrinsic::aarch64_sve_ld3q_sret:
7326 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7327 case Intrinsic::aarch64_sve_st3q:
7329 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7330 case Intrinsic::aarch64_sve_ld4_sret:
7331 case Intrinsic::aarch64_sve_ld4q_sret:
7333 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7334 case Intrinsic::aarch64_sve_st4q:
7336 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7337 case Intrinsic::aarch64_sve_ld1udq:
7338 case Intrinsic::aarch64_sve_st1dq:
7339 return EVT(MVT::nxv1i64);
7340 case Intrinsic::aarch64_sve_ld1uwq:
7341 case Intrinsic::aarch64_sve_st1wq:
7342 return EVT(MVT::nxv1i32);
7343 }
7344}
7345
7346/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7347/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7348/// where Root is the memory access using N for its address.
7349template <int64_t Min, int64_t Max>
7350bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7351 SDValue &Base,
7352 SDValue &OffImm) {
7353 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7354 const DataLayout &DL = CurDAG->getDataLayout();
7355 const MachineFrameInfo &MFI = MF->getFrameInfo();
7356
7357 if (N.getOpcode() == ISD::FrameIndex) {
7358 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7359 // We can only encode VL scaled offsets, so only fold in frame indexes
7360 // referencing SVE objects.
7362 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7363 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7364 return true;
7365 }
7366
7367 return false;
7368 }
7369
7370 if (MemVT == EVT())
7371 return false;
7372
7373 if (N.getOpcode() != ISD::ADD)
7374 return false;
7375
7376 SDValue VScale = N.getOperand(1);
7377 if (VScale.getOpcode() != ISD::VSCALE)
7378 return false;
7379
7380 TypeSize TS = MemVT.getSizeInBits();
7381 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7382 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7383
7384 if ((MulImm % MemWidthBytes) != 0)
7385 return false;
7386
7387 int64_t Offset = MulImm / MemWidthBytes;
7388 if (Offset < Min || Offset > Max)
7389 return false;
7390
7391 Base = N.getOperand(0);
7392 if (Base.getOpcode() == ISD::FrameIndex) {
7393 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7394 // We can only encode VL scaled offsets, so only fold in frame indexes
7395 // referencing SVE objects.
7397 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7398 }
7399
7400 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7401 return true;
7402}
7403
7404/// Select register plus register addressing mode for SVE, with scaled
7405/// offset.
7406bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7407 SDValue &Base,
7408 SDValue &Offset) {
7409 if (N.getOpcode() != ISD::ADD)
7410 return false;
7411
7412 // Process an ADD node.
7413 const SDValue LHS = N.getOperand(0);
7414 const SDValue RHS = N.getOperand(1);
7415
7416 // 8 bit data does not come with the SHL node, so it is treated
7417 // separately.
7418 if (Scale == 0) {
7419 Base = LHS;
7420 Offset = RHS;
7421 return true;
7422 }
7423
7424 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7425 int64_t ImmOff = C->getSExtValue();
7426 unsigned Size = 1 << Scale;
7427
7428 // To use the reg+reg addressing mode, the immediate must be a multiple of
7429 // the vector element's byte size.
7430 if (ImmOff % Size)
7431 return false;
7432
7433 SDLoc DL(N);
7434 Base = LHS;
7435 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7436 SDValue Ops[] = {Offset};
7437 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7438 Offset = SDValue(MI, 0);
7439 return true;
7440 }
7441
7442 // Check if the RHS is a shift node with a constant.
7443 if (RHS.getOpcode() != ISD::SHL)
7444 return false;
7445
7446 const SDValue ShiftRHS = RHS.getOperand(1);
7447 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7448 if (C->getZExtValue() == Scale) {
7449 Base = LHS;
7450 Offset = RHS.getOperand(0);
7451 return true;
7452 }
7453
7454 return false;
7455}
7456
7457bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7458 const AArch64TargetLowering *TLI =
7459 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7460
7461 return TLI->isAllActivePredicate(*CurDAG, N);
7462}
7463
7464bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7465 EVT VT = N.getValueType();
7466 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7467}
7468
7469bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7471 unsigned Scale) {
7472 // Try to untangle an ADD node into a 'reg + offset'
7473 if (CurDAG->isBaseWithConstantOffset(N))
7474 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
7475 int64_t ImmOff = C->getSExtValue();
7476 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
7477 Base = N.getOperand(0);
7478 Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7479 return true;
7480 }
7481 }
7482
7483 // By default, just match reg + 0.
7484 Base = N;
7485 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7486 return true;
7487}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
#define PASS_NAME
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
#define DEBUG_TYPE
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
aarch64 promote const
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Size
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * RHS
Value * LHS
support::ulittle16_t & Lo
Definition: aarch32.cpp:204
support::ulittle16_t & Hi
Definition: aarch32.cpp:203
DEMANGLE_DUMP_METHOD void dump() const
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
bool getExactInverse(APFloat *inv) const
Definition: APFloat.h:1479
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1321
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1649
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1007
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:258
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1618
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1577
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1434
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:510
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
iterator begin() const
Definition: ArrayRef.h:156
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint8_t getStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:456
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:698
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
size_t size() const
Definition: SmallVector.h:78
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:700
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:927
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
const SysReg * lookupSysRegByName(StringRef)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1312
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1308
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1407
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1319
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:223
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1551
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1582
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:480
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:279
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1952
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Extended Value Type.
Definition: ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:202
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43