LLVM 21.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64
65 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
66 /// inline asm expressions.
68 InlineAsm::ConstraintCode ConstraintID,
69 std::vector<SDValue> &OutOps) override;
70
71 template <signed Low, signed High, signed Scale>
72 bool SelectRDVLImm(SDValue N, SDValue &Imm);
73
74 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
75 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
76 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
77 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
78 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
79 return SelectShiftedRegister(N, false, Reg, Shift);
80 }
81 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
82 return SelectShiftedRegister(N, true, Reg, Shift);
83 }
84 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
85 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
86 }
87 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
88 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
89 }
90 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
91 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
92 }
93 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
94 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
95 }
96 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
97 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
98 }
99 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
100 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
101 }
102 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
103 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
104 }
105 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
106 return SelectAddrModeIndexed(N, 1, Base, OffImm);
107 }
108 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
109 return SelectAddrModeIndexed(N, 2, Base, OffImm);
110 }
111 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
112 return SelectAddrModeIndexed(N, 4, Base, OffImm);
113 }
114 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
115 return SelectAddrModeIndexed(N, 8, Base, OffImm);
116 }
117 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
118 return SelectAddrModeIndexed(N, 16, Base, OffImm);
119 }
120 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
121 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
122 }
123 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
124 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
125 }
126 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
127 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
128 }
129 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
130 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
131 }
132 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
133 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
134 }
135 template <unsigned Size, unsigned Max>
136 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
137 // Test if there is an appropriate addressing mode and check if the
138 // immediate fits.
139 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
140 if (Found) {
141 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
142 int64_t C = CI->getSExtValue();
143 if (C <= Max)
144 return true;
145 }
146 }
147
148 // Otherwise, base only, materialize address in register.
149 Base = N;
150 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
151 return true;
152 }
153
154 template<int Width>
155 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
156 SDValue &SignExtend, SDValue &DoShift) {
157 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
158 }
159
160 template<int Width>
161 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
162 SDValue &SignExtend, SDValue &DoShift) {
163 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
164 }
165
166 bool SelectExtractHigh(SDValue N, SDValue &Res) {
167 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
168 N = N->getOperand(0);
169 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
170 !isa<ConstantSDNode>(N->getOperand(1)))
171 return false;
172 EVT VT = N->getValueType(0);
173 EVT LVT = N->getOperand(0).getValueType();
174 unsigned Index = N->getConstantOperandVal(1);
175 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
176 Index != VT.getVectorNumElements())
177 return false;
178 Res = N->getOperand(0);
179 return true;
180 }
181
182 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
183 if (N.getOpcode() != AArch64ISD::VLSHR)
184 return false;
185 SDValue Op = N->getOperand(0);
186 EVT VT = Op.getValueType();
187 unsigned ShtAmt = N->getConstantOperandVal(1);
188 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
189 return false;
190
191 APInt Imm;
192 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
193 Imm = APInt(VT.getScalarSizeInBits(),
194 Op.getOperand(1).getConstantOperandVal(0)
195 << Op.getOperand(1).getConstantOperandVal(1));
196 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
197 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
198 Imm = APInt(VT.getScalarSizeInBits(),
199 Op.getOperand(1).getConstantOperandVal(0));
200 else
201 return false;
202
203 if (Imm != 1ULL << (ShtAmt - 1))
204 return false;
205
206 Res1 = Op.getOperand(0);
207 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
208 return true;
209 }
210
211 bool SelectDupZeroOrUndef(SDValue N) {
212 switch(N->getOpcode()) {
213 case ISD::UNDEF:
214 return true;
215 case AArch64ISD::DUP:
216 case ISD::SPLAT_VECTOR: {
217 auto Opnd0 = N->getOperand(0);
218 if (isNullConstant(Opnd0))
219 return true;
220 if (isNullFPConstant(Opnd0))
221 return true;
222 break;
223 }
224 default:
225 break;
226 }
227
228 return false;
229 }
230
231 bool SelectAny(SDValue) { return true; }
232
233 bool SelectDupZero(SDValue N) {
234 switch(N->getOpcode()) {
235 case AArch64ISD::DUP:
236 case ISD::SPLAT_VECTOR: {
237 auto Opnd0 = N->getOperand(0);
238 if (isNullConstant(Opnd0))
239 return true;
240 if (isNullFPConstant(Opnd0))
241 return true;
242 break;
243 }
244 }
245
246 return false;
247 }
248
249 template<MVT::SimpleValueType VT>
250 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
251 return SelectSVEAddSubImm(N, VT, Imm, Shift);
252 }
253
254 template <MVT::SimpleValueType VT, bool Negate>
255 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
256 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
257 }
258
259 template <MVT::SimpleValueType VT>
260 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
261 return SelectSVECpyDupImm(N, VT, Imm, Shift);
262 }
263
264 template <MVT::SimpleValueType VT, bool Invert = false>
265 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
266 return SelectSVELogicalImm(N, VT, Imm, Invert);
267 }
268
269 template <MVT::SimpleValueType VT>
270 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
271 return SelectSVEArithImm(N, VT, Imm);
272 }
273
274 template <unsigned Low, unsigned High, bool AllowSaturation = false>
275 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
276 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
277 }
278
279 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
280 if (N->getOpcode() != ISD::SPLAT_VECTOR)
281 return false;
282
283 EVT EltVT = N->getValueType(0).getVectorElementType();
284 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
285 /* High */ EltVT.getFixedSizeInBits(),
286 /* AllowSaturation */ true, Imm);
287 }
288
289 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
290 template<signed Min, signed Max, signed Scale, bool Shift>
291 bool SelectCntImm(SDValue N, SDValue &Imm) {
292 if (!isa<ConstantSDNode>(N))
293 return false;
294
295 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
296 if (Shift)
297 MulImm = 1LL << MulImm;
298
299 if ((MulImm % std::abs(Scale)) != 0)
300 return false;
301
302 MulImm /= Scale;
303 if ((MulImm >= Min) && (MulImm <= Max)) {
304 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
305 return true;
306 }
307
308 return false;
309 }
310
311 template <signed Max, signed Scale>
312 bool SelectEXTImm(SDValue N, SDValue &Imm) {
313 if (!isa<ConstantSDNode>(N))
314 return false;
315
316 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
317
318 if (MulImm >= 0 && MulImm <= Max) {
319 MulImm *= Scale;
320 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
321 return true;
322 }
323
324 return false;
325 }
326
327 template <unsigned BaseReg, unsigned Max>
328 bool ImmToReg(SDValue N, SDValue &Imm) {
329 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
330 uint64_t C = CI->getZExtValue();
331
332 if (C > Max)
333 return false;
334
335 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
336 return true;
337 }
338 return false;
339 }
340
341 /// Form sequences of consecutive 64/128-bit registers for use in NEON
342 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
343 /// between 1 and 4 elements. If it contains a single element that is returned
344 /// unchanged; otherwise a REG_SEQUENCE value is returned.
347 // Form a sequence of SVE registers for instructions using list of vectors,
348 // e.g. structured loads and stores (ldN, stN).
349 SDValue createZTuple(ArrayRef<SDValue> Vecs);
350
351 // Similar to above, except the register must start at a multiple of the
352 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
353 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
354
355 /// Generic helper for the createDTuple/createQTuple
356 /// functions. Those should almost always be called instead.
357 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
358 const unsigned SubRegs[]);
359
360 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
361
362 bool tryIndexedLoad(SDNode *N);
363
364 void SelectPtrauthAuth(SDNode *N);
365 void SelectPtrauthResign(SDNode *N);
366
367 bool trySelectStackSlotTagP(SDNode *N);
368 void SelectTagP(SDNode *N);
369
370 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
371 unsigned SubRegIdx);
372 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
373 unsigned SubRegIdx);
374 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
375 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
376 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
377 unsigned Opc_rr, unsigned Opc_ri,
378 bool IsIntr = false);
379 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
380 unsigned Scale, unsigned Opc_ri,
381 unsigned Opc_rr);
382 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
383 bool IsZmMulti, unsigned Opcode,
384 bool HasPred = false);
385 void SelectPExtPair(SDNode *N, unsigned Opc);
386 void SelectWhilePair(SDNode *N, unsigned Opc);
387 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
388 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
389 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
390 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
391 bool IsTupleInput, unsigned Opc);
392 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
393
394 template <unsigned MaxIdx, unsigned Scale>
395 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
396 unsigned Op);
397 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
398 unsigned Op, unsigned MaxIdx, unsigned Scale,
399 unsigned BaseReg = 0);
400 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
401 /// SVE Reg+Imm addressing mode.
402 template <int64_t Min, int64_t Max>
403 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
404 SDValue &OffImm);
405 /// SVE Reg+Reg address mode.
406 template <unsigned Scale>
407 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
408 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
409 }
410
411 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
412 unsigned Opc, uint32_t MaxImm);
413
414 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
415
416 template <unsigned MaxIdx, unsigned Scale>
417 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
418 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
419 }
420
421 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
422 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
423 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
424 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
425 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
426 unsigned Opc_rr, unsigned Opc_ri);
427 std::tuple<unsigned, SDValue, SDValue>
428 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
429 const SDValue &OldBase, const SDValue &OldOffset,
430 unsigned Scale);
431
432 bool tryBitfieldExtractOp(SDNode *N);
433 bool tryBitfieldExtractOpFromSExt(SDNode *N);
434 bool tryBitfieldInsertOp(SDNode *N);
435 bool tryBitfieldInsertInZeroOp(SDNode *N);
436 bool tryShiftAmountMod(SDNode *N);
437
438 bool tryReadRegister(SDNode *N);
439 bool tryWriteRegister(SDNode *N);
440
441 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
442 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
443
444 bool trySelectXAR(SDNode *N);
445
446// Include the pieces autogenerated from the target description.
447#include "AArch64GenDAGISel.inc"
448
449private:
450 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
451 SDValue &Shift);
452 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
453 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
454 SDValue &OffImm) {
455 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
456 }
457 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
458 unsigned Size, SDValue &Base,
459 SDValue &OffImm);
460 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
461 SDValue &OffImm);
462 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
463 SDValue &OffImm);
464 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
465 SDValue &Offset, SDValue &SignExtend,
466 SDValue &DoShift);
467 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
468 SDValue &Offset, SDValue &SignExtend,
469 SDValue &DoShift);
470 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
471 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
472 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
473 SDValue &Offset, SDValue &SignExtend);
474
475 template<unsigned RegWidth>
476 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
477 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
478 }
479
480 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
481
482 template<unsigned RegWidth>
483 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
484 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
485 }
486
487 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
488 unsigned Width);
489
490 bool SelectCMP_SWAP(SDNode *N);
491
492 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
493 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
494 bool Negate);
495 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
496 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
497
498 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
499 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
500 bool AllowSaturation, SDValue &Imm);
501
502 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
503 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
504 SDValue &Offset);
505 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
506 SDValue &Offset, unsigned Scale = 1);
507
508 bool SelectAllActivePredicate(SDValue N);
509 bool SelectAnyPredicate(SDValue N);
510};
511
512class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
513public:
514 static char ID;
515 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
516 CodeGenOptLevel OptLevel)
518 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
519};
520} // end anonymous namespace
521
522char AArch64DAGToDAGISelLegacy::ID = 0;
523
524INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
525
526/// isIntImmediate - This method tests to see if the node is a constant
527/// operand. If so Imm will receive the 32-bit value.
528static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
529 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
530 Imm = C->getZExtValue();
531 return true;
532 }
533 return false;
534}
535
536// isIntImmediate - This method tests to see if a constant operand.
537// If so Imm will receive the value.
538static bool isIntImmediate(SDValue N, uint64_t &Imm) {
539 return isIntImmediate(N.getNode(), Imm);
540}
541
542// isOpcWithIntImmediate - This method tests to see if the node is a specific
543// opcode and that it has a immediate integer right operand.
544// If so Imm will receive the 32 bit value.
545static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
546 uint64_t &Imm) {
547 return N->getOpcode() == Opc &&
548 isIntImmediate(N->getOperand(1).getNode(), Imm);
549}
550
551// isIntImmediateEq - This method tests to see if N is a constant operand that
552// is equivalent to 'ImmExpected'.
553#ifndef NDEBUG
554static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
555 uint64_t Imm;
556 if (!isIntImmediate(N.getNode(), Imm))
557 return false;
558 return Imm == ImmExpected;
559}
560#endif
561
562bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
563 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
564 std::vector<SDValue> &OutOps) {
565 switch(ConstraintID) {
566 default:
567 llvm_unreachable("Unexpected asm memory constraint");
568 case InlineAsm::ConstraintCode::m:
569 case InlineAsm::ConstraintCode::o:
570 case InlineAsm::ConstraintCode::Q:
571 // We need to make sure that this one operand does not end up in XZR, thus
572 // require the address to be in a PointerRegClass register.
573 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
574 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
575 SDLoc dl(Op);
576 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
577 SDValue NewOp =
578 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
579 dl, Op.getValueType(),
580 Op, RC), 0);
581 OutOps.push_back(NewOp);
582 return false;
583 }
584 return true;
585}
586
587/// SelectArithImmed - Select an immediate value that can be represented as
588/// a 12-bit value shifted left by either 0 or 12. If so, return true with
589/// Val set to the 12-bit value and Shift set to the shifter operand.
590bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
591 SDValue &Shift) {
592 // This function is called from the addsub_shifted_imm ComplexPattern,
593 // which lists [imm] as the list of opcode it's interested in, however
594 // we still need to check whether the operand is actually an immediate
595 // here because the ComplexPattern opcode list is only used in
596 // root-level opcode matching.
597 if (!isa<ConstantSDNode>(N.getNode()))
598 return false;
599
600 uint64_t Immed = N.getNode()->getAsZExtVal();
601 unsigned ShiftAmt;
602
603 if (Immed >> 12 == 0) {
604 ShiftAmt = 0;
605 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
606 ShiftAmt = 12;
607 Immed = Immed >> 12;
608 } else
609 return false;
610
611 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
612 SDLoc dl(N);
613 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
614 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
615 return true;
616}
617
618/// SelectNegArithImmed - As above, but negates the value before trying to
619/// select it.
620bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
621 SDValue &Shift) {
622 // This function is called from the addsub_shifted_imm ComplexPattern,
623 // which lists [imm] as the list of opcode it's interested in, however
624 // we still need to check whether the operand is actually an immediate
625 // here because the ComplexPattern opcode list is only used in
626 // root-level opcode matching.
627 if (!isa<ConstantSDNode>(N.getNode()))
628 return false;
629
630 // The immediate operand must be a 24-bit zero-extended immediate.
631 uint64_t Immed = N.getNode()->getAsZExtVal();
632
633 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
634 // have the opposite effect on the C flag, so this pattern mustn't match under
635 // those circumstances.
636 if (Immed == 0)
637 return false;
638
639 if (N.getValueType() == MVT::i32)
640 Immed = ~((uint32_t)Immed) + 1;
641 else
642 Immed = ~Immed + 1ULL;
643 if (Immed & 0xFFFFFFFFFF000000ULL)
644 return false;
645
646 Immed &= 0xFFFFFFULL;
647 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
648 Shift);
649}
650
651/// getShiftTypeForNode - Translate a shift node to the corresponding
652/// ShiftType value.
654 switch (N.getOpcode()) {
655 default:
657 case ISD::SHL:
658 return AArch64_AM::LSL;
659 case ISD::SRL:
660 return AArch64_AM::LSR;
661 case ISD::SRA:
662 return AArch64_AM::ASR;
663 case ISD::ROTR:
664 return AArch64_AM::ROR;
665 }
666}
667
669 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
670}
671
672/// Determine whether it is worth it to fold SHL into the addressing
673/// mode.
675 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
676 // It is worth folding logical shift of up to three places.
677 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
678 if (!CSD)
679 return false;
680 unsigned ShiftVal = CSD->getZExtValue();
681 if (ShiftVal > 3)
682 return false;
683
684 // Check if this particular node is reused in any non-memory related
685 // operation. If yes, do not try to fold this node into the address
686 // computation, since the computation will be kept.
687 const SDNode *Node = V.getNode();
688 for (SDNode *UI : Node->users())
689 if (!isMemOpOrPrefetch(UI))
690 for (SDNode *UII : UI->users())
691 if (!isMemOpOrPrefetch(UII))
692 return false;
693 return true;
694}
695
696/// Determine whether it is worth to fold V into an extended register addressing
697/// mode.
698bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
699 // Trivial if we are optimizing for code size or if there is only
700 // one use of the value.
701 if (CurDAG->shouldOptForSize() || V.hasOneUse())
702 return true;
703
704 // If a subtarget has a slow shift, folding a shift into multiple loads
705 // costs additional micro-ops.
706 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
707 return false;
708
709 // Check whether we're going to emit the address arithmetic anyway because
710 // it's used by a non-address operation.
711 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
712 return true;
713 if (V.getOpcode() == ISD::ADD) {
714 const SDValue LHS = V.getOperand(0);
715 const SDValue RHS = V.getOperand(1);
716 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
717 return true;
718 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
719 return true;
720 }
721
722 // It hurts otherwise, since the value will be reused.
723 return false;
724}
725
726/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
727/// to select more shifted register
728bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
729 SDValue &Shift) {
730 EVT VT = N.getValueType();
731 if (VT != MVT::i32 && VT != MVT::i64)
732 return false;
733
734 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
735 return false;
736 SDValue LHS = N.getOperand(0);
737 if (!LHS->hasOneUse())
738 return false;
739
740 unsigned LHSOpcode = LHS->getOpcode();
741 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
742 return false;
743
744 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
745 if (!ShiftAmtNode)
746 return false;
747
748 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
749 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
750 if (!RHSC)
751 return false;
752
753 APInt AndMask = RHSC->getAPIntValue();
754 unsigned LowZBits, MaskLen;
755 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
756 return false;
757
758 unsigned BitWidth = N.getValueSizeInBits();
759 SDLoc DL(LHS);
760 uint64_t NewShiftC;
761 unsigned NewShiftOp;
762 if (LHSOpcode == ISD::SHL) {
763 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
764 // BitWidth != LowZBits + MaskLen doesn't match the pattern
765 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
766 return false;
767
768 NewShiftC = LowZBits - ShiftAmtC;
769 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
770 } else {
771 if (LowZBits == 0)
772 return false;
773
774 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
775 NewShiftC = LowZBits + ShiftAmtC;
776 if (NewShiftC >= BitWidth)
777 return false;
778
779 // SRA need all high bits
780 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
781 return false;
782
783 // SRL high bits can be 0 or 1
784 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
785 return false;
786
787 if (LHSOpcode == ISD::SRL)
788 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
789 else
790 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
791 }
792
793 assert(NewShiftC < BitWidth && "Invalid shift amount");
794 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
795 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
796 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
797 NewShiftAmt, BitWidthMinus1),
798 0);
799 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
800 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
801 return true;
802}
803
804/// getExtendTypeForNode - Translate an extend node to the corresponding
805/// ExtendType value.
807getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
808 if (N.getOpcode() == ISD::SIGN_EXTEND ||
809 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
810 EVT SrcVT;
811 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
812 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
813 else
814 SrcVT = N.getOperand(0).getValueType();
815
816 if (!IsLoadStore && SrcVT == MVT::i8)
817 return AArch64_AM::SXTB;
818 else if (!IsLoadStore && SrcVT == MVT::i16)
819 return AArch64_AM::SXTH;
820 else if (SrcVT == MVT::i32)
821 return AArch64_AM::SXTW;
822 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
823
825 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
826 N.getOpcode() == ISD::ANY_EXTEND) {
827 EVT SrcVT = N.getOperand(0).getValueType();
828 if (!IsLoadStore && SrcVT == MVT::i8)
829 return AArch64_AM::UXTB;
830 else if (!IsLoadStore && SrcVT == MVT::i16)
831 return AArch64_AM::UXTH;
832 else if (SrcVT == MVT::i32)
833 return AArch64_AM::UXTW;
834 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
835
837 } else if (N.getOpcode() == ISD::AND) {
838 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
839 if (!CSD)
841 uint64_t AndMask = CSD->getZExtValue();
842
843 switch (AndMask) {
844 default:
846 case 0xFF:
847 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
848 case 0xFFFF:
849 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
850 case 0xFFFFFFFF:
851 return AArch64_AM::UXTW;
852 }
853 }
854
856}
857
858/// Determine whether it is worth to fold V into an extended register of an
859/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
860/// instruction, and the shift should be treated as worth folding even if has
861/// multiple uses.
862bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
863 // Trivial if we are optimizing for code size or if there is only
864 // one use of the value.
865 if (CurDAG->shouldOptForSize() || V.hasOneUse())
866 return true;
867
868 // If a subtarget has a fastpath LSL we can fold a logical shift into
869 // the add/sub and save a cycle.
870 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
871 V.getConstantOperandVal(1) <= 4 &&
873 return true;
874
875 // It hurts otherwise, since the value will be reused.
876 return false;
877}
878
879/// SelectShiftedRegister - Select a "shifted register" operand. If the value
880/// is not shifted, set the Shift operand to default of "LSL 0". The logical
881/// instructions allow the shifted register to be rotated, but the arithmetic
882/// instructions do not. The AllowROR parameter specifies whether ROR is
883/// supported.
884bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
885 SDValue &Reg, SDValue &Shift) {
886 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
887 return true;
888
890 if (ShType == AArch64_AM::InvalidShiftExtend)
891 return false;
892 if (!AllowROR && ShType == AArch64_AM::ROR)
893 return false;
894
895 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
896 unsigned BitSize = N.getValueSizeInBits();
897 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
898 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
899
900 Reg = N.getOperand(0);
901 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
902 return isWorthFoldingALU(N, true);
903 }
904
905 return false;
906}
907
908/// Instructions that accept extend modifiers like UXTW expect the register
909/// being extended to be a GPR32, but the incoming DAG might be acting on a
910/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
911/// this is the case.
913 if (N.getValueType() == MVT::i32)
914 return N;
915
916 SDLoc dl(N);
917 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
918}
919
920// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
921template<signed Low, signed High, signed Scale>
922bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
923 if (!isa<ConstantSDNode>(N))
924 return false;
925
926 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
927 if ((MulImm % std::abs(Scale)) == 0) {
928 int64_t RDVLImm = MulImm / Scale;
929 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
930 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
931 return true;
932 }
933 }
934
935 return false;
936}
937
938/// SelectArithExtendedRegister - Select a "extended register" operand. This
939/// operand folds in an extend followed by an optional left shift.
940bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
941 SDValue &Shift) {
942 unsigned ShiftVal = 0;
944
945 if (N.getOpcode() == ISD::SHL) {
946 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
947 if (!CSD)
948 return false;
949 ShiftVal = CSD->getZExtValue();
950 if (ShiftVal > 4)
951 return false;
952
953 Ext = getExtendTypeForNode(N.getOperand(0));
955 return false;
956
957 Reg = N.getOperand(0).getOperand(0);
958 } else {
961 return false;
962
963 Reg = N.getOperand(0);
964
965 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
966 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
967 auto isDef32 = [](SDValue N) {
968 unsigned Opc = N.getOpcode();
969 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
970 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
971 Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
972 Opc != ISD::FREEZE;
973 };
974 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
975 isDef32(Reg))
976 return false;
977 }
978
979 // AArch64 mandates that the RHS of the operation must use the smallest
980 // register class that could contain the size being extended from. Thus,
981 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
982 // there might not be an actual 32-bit value in the program. We can
983 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
984 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
985 Reg = narrowIfNeeded(CurDAG, Reg);
986 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
987 MVT::i32);
988 return isWorthFoldingALU(N);
989}
990
991/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
992/// operand is refered by the instructions have SP operand
993bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
994 SDValue &Shift) {
995 unsigned ShiftVal = 0;
997
998 if (N.getOpcode() != ISD::SHL)
999 return false;
1000
1001 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1002 if (!CSD)
1003 return false;
1004 ShiftVal = CSD->getZExtValue();
1005 if (ShiftVal > 4)
1006 return false;
1007
1009 Reg = N.getOperand(0);
1010 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1011 MVT::i32);
1012 return isWorthFoldingALU(N);
1013}
1014
1015/// If there's a use of this ADDlow that's not itself a load/store then we'll
1016/// need to create a real ADD instruction from it anyway and there's no point in
1017/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1018/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1019/// leads to duplicated ADRP instructions.
1021 for (auto *User : N->users()) {
1022 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1023 User->getOpcode() != ISD::ATOMIC_LOAD &&
1024 User->getOpcode() != ISD::ATOMIC_STORE)
1025 return false;
1026
1027 // ldar and stlr have much more restrictive addressing modes (just a
1028 // register).
1029 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1030 return false;
1031 }
1032
1033 return true;
1034}
1035
1036/// Check if the immediate offset is valid as a scaled immediate.
1037static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1038 unsigned Size) {
1039 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1040 Offset < (Range << Log2_32(Size)))
1041 return true;
1042 return false;
1043}
1044
1045/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1046/// immediate" address. The "Size" argument is the size in bytes of the memory
1047/// reference, which determines the scale.
1048bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1049 unsigned BW, unsigned Size,
1050 SDValue &Base,
1051 SDValue &OffImm) {
1052 SDLoc dl(N);
1053 const DataLayout &DL = CurDAG->getDataLayout();
1054 const TargetLowering *TLI = getTargetLowering();
1055 if (N.getOpcode() == ISD::FrameIndex) {
1056 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1057 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1058 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1059 return true;
1060 }
1061
1062 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1063 // selected here doesn't support labels/immediates, only base+offset.
1064 if (CurDAG->isBaseWithConstantOffset(N)) {
1065 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1066 if (IsSignedImm) {
1067 int64_t RHSC = RHS->getSExtValue();
1068 unsigned Scale = Log2_32(Size);
1069 int64_t Range = 0x1LL << (BW - 1);
1070
1071 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1072 RHSC < (Range << Scale)) {
1073 Base = N.getOperand(0);
1074 if (Base.getOpcode() == ISD::FrameIndex) {
1075 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1076 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1077 }
1078 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1079 return true;
1080 }
1081 } else {
1082 // unsigned Immediate
1083 uint64_t RHSC = RHS->getZExtValue();
1084 unsigned Scale = Log2_32(Size);
1085 uint64_t Range = 0x1ULL << BW;
1086
1087 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1088 Base = N.getOperand(0);
1089 if (Base.getOpcode() == ISD::FrameIndex) {
1090 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1091 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1092 }
1093 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1094 return true;
1095 }
1096 }
1097 }
1098 }
1099 // Base only. The address will be materialized into a register before
1100 // the memory is accessed.
1101 // add x0, Xbase, #offset
1102 // stp x1, x2, [x0]
1103 Base = N;
1104 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1105 return true;
1106}
1107
1108/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1109/// immediate" address. The "Size" argument is the size in bytes of the memory
1110/// reference, which determines the scale.
1111bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1112 SDValue &Base, SDValue &OffImm) {
1113 SDLoc dl(N);
1114 const DataLayout &DL = CurDAG->getDataLayout();
1115 const TargetLowering *TLI = getTargetLowering();
1116 if (N.getOpcode() == ISD::FrameIndex) {
1117 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1118 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1119 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1120 return true;
1121 }
1122
1123 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1124 GlobalAddressSDNode *GAN =
1125 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1126 Base = N.getOperand(0);
1127 OffImm = N.getOperand(1);
1128 if (!GAN)
1129 return true;
1130
1131 if (GAN->getOffset() % Size == 0 &&
1133 return true;
1134 }
1135
1136 if (CurDAG->isBaseWithConstantOffset(N)) {
1137 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1138 int64_t RHSC = (int64_t)RHS->getZExtValue();
1139 unsigned Scale = Log2_32(Size);
1140 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1141 Base = N.getOperand(0);
1142 if (Base.getOpcode() == ISD::FrameIndex) {
1143 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1144 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1145 }
1146 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1147 return true;
1148 }
1149 }
1150 }
1151
1152 // Before falling back to our general case, check if the unscaled
1153 // instructions can handle this. If so, that's preferable.
1154 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1155 return false;
1156
1157 // Base only. The address will be materialized into a register before
1158 // the memory is accessed.
1159 // add x0, Xbase, #offset
1160 // ldr x0, [x0]
1161 Base = N;
1162 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1163 return true;
1164}
1165
1166/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1167/// immediate" address. This should only match when there is an offset that
1168/// is not valid for a scaled immediate addressing mode. The "Size" argument
1169/// is the size in bytes of the memory reference, which is needed here to know
1170/// what is valid for a scaled immediate.
1171bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1172 SDValue &Base,
1173 SDValue &OffImm) {
1174 if (!CurDAG->isBaseWithConstantOffset(N))
1175 return false;
1176 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1177 int64_t RHSC = RHS->getSExtValue();
1178 if (RHSC >= -256 && RHSC < 256) {
1179 Base = N.getOperand(0);
1180 if (Base.getOpcode() == ISD::FrameIndex) {
1181 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1182 const TargetLowering *TLI = getTargetLowering();
1183 Base = CurDAG->getTargetFrameIndex(
1184 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1185 }
1186 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1187 return true;
1188 }
1189 }
1190 return false;
1191}
1192
1194 SDLoc dl(N);
1195 SDValue ImpDef = SDValue(
1196 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1197 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1198 N);
1199}
1200
1201/// Check if the given SHL node (\p N), can be used to form an
1202/// extended register for an addressing mode.
1203bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1204 bool WantExtend, SDValue &Offset,
1205 SDValue &SignExtend) {
1206 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1207 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1208 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1209 return false;
1210
1211 SDLoc dl(N);
1212 if (WantExtend) {
1214 getExtendTypeForNode(N.getOperand(0), true);
1216 return false;
1217
1218 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1219 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1220 MVT::i32);
1221 } else {
1222 Offset = N.getOperand(0);
1223 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1224 }
1225
1226 unsigned LegalShiftVal = Log2_32(Size);
1227 unsigned ShiftVal = CSD->getZExtValue();
1228
1229 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1230 return false;
1231
1232 return isWorthFoldingAddr(N, Size);
1233}
1234
1235bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1237 SDValue &SignExtend,
1238 SDValue &DoShift) {
1239 if (N.getOpcode() != ISD::ADD)
1240 return false;
1241 SDValue LHS = N.getOperand(0);
1242 SDValue RHS = N.getOperand(1);
1243 SDLoc dl(N);
1244
1245 // We don't want to match immediate adds here, because they are better lowered
1246 // to the register-immediate addressing modes.
1247 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1248 return false;
1249
1250 // Check if this particular node is reused in any non-memory related
1251 // operation. If yes, do not try to fold this node into the address
1252 // computation, since the computation will be kept.
1253 const SDNode *Node = N.getNode();
1254 for (SDNode *UI : Node->users()) {
1255 if (!isMemOpOrPrefetch(UI))
1256 return false;
1257 }
1258
1259 // Remember if it is worth folding N when it produces extended register.
1260 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1261
1262 // Try to match a shifted extend on the RHS.
1263 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1264 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1265 Base = LHS;
1266 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1267 return true;
1268 }
1269
1270 // Try to match a shifted extend on the LHS.
1271 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1272 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1273 Base = RHS;
1274 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1275 return true;
1276 }
1277
1278 // There was no shift, whatever else we find.
1279 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1280
1282 // Try to match an unshifted extend on the LHS.
1283 if (IsExtendedRegisterWorthFolding &&
1284 (Ext = getExtendTypeForNode(LHS, true)) !=
1286 Base = RHS;
1287 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1288 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1289 MVT::i32);
1290 if (isWorthFoldingAddr(LHS, Size))
1291 return true;
1292 }
1293
1294 // Try to match an unshifted extend on the RHS.
1295 if (IsExtendedRegisterWorthFolding &&
1296 (Ext = getExtendTypeForNode(RHS, true)) !=
1298 Base = LHS;
1299 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1300 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1301 MVT::i32);
1302 if (isWorthFoldingAddr(RHS, Size))
1303 return true;
1304 }
1305
1306 return false;
1307}
1308
1309// Check if the given immediate is preferred by ADD. If an immediate can be
1310// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1311// encoded by one MOVZ, return true.
1312static bool isPreferredADD(int64_t ImmOff) {
1313 // Constant in [0x0, 0xfff] can be encoded in ADD.
1314 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1315 return true;
1316 // Check if it can be encoded in an "ADD LSL #12".
1317 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1318 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1319 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1320 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1321 return false;
1322}
1323
1324bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1326 SDValue &SignExtend,
1327 SDValue &DoShift) {
1328 if (N.getOpcode() != ISD::ADD)
1329 return false;
1330 SDValue LHS = N.getOperand(0);
1331 SDValue RHS = N.getOperand(1);
1332 SDLoc DL(N);
1333
1334 // Check if this particular node is reused in any non-memory related
1335 // operation. If yes, do not try to fold this node into the address
1336 // computation, since the computation will be kept.
1337 const SDNode *Node = N.getNode();
1338 for (SDNode *UI : Node->users()) {
1339 if (!isMemOpOrPrefetch(UI))
1340 return false;
1341 }
1342
1343 // Watch out if RHS is a wide immediate, it can not be selected into
1344 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1345 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1346 // instructions like:
1347 // MOV X0, WideImmediate
1348 // ADD X1, BaseReg, X0
1349 // LDR X2, [X1, 0]
1350 // For such situation, using [BaseReg, XReg] addressing mode can save one
1351 // ADD/SUB:
1352 // MOV X0, WideImmediate
1353 // LDR X2, [BaseReg, X0]
1354 if (isa<ConstantSDNode>(RHS)) {
1355 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1356 // Skip the immediate can be selected by load/store addressing mode.
1357 // Also skip the immediate can be encoded by a single ADD (SUB is also
1358 // checked by using -ImmOff).
1359 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1360 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1361 return false;
1362
1363 SDValue Ops[] = { RHS };
1364 SDNode *MOVI =
1365 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1366 SDValue MOVIV = SDValue(MOVI, 0);
1367 // This ADD of two X register will be selected into [Reg+Reg] mode.
1368 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1369 }
1370
1371 // Remember if it is worth folding N when it produces extended register.
1372 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1373
1374 // Try to match a shifted extend on the RHS.
1375 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1376 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1377 Base = LHS;
1378 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1379 return true;
1380 }
1381
1382 // Try to match a shifted extend on the LHS.
1383 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1384 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1385 Base = RHS;
1386 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1387 return true;
1388 }
1389
1390 // Match any non-shifted, non-extend, non-immediate add expression.
1391 Base = LHS;
1392 Offset = RHS;
1393 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1394 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1395 // Reg1 + Reg2 is free: no check needed.
1396 return true;
1397}
1398
1399SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1400 static const unsigned RegClassIDs[] = {
1401 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1402 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1403 AArch64::dsub2, AArch64::dsub3};
1404
1405 return createTuple(Regs, RegClassIDs, SubRegs);
1406}
1407
1408SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1409 static const unsigned RegClassIDs[] = {
1410 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1411 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1412 AArch64::qsub2, AArch64::qsub3};
1413
1414 return createTuple(Regs, RegClassIDs, SubRegs);
1415}
1416
1417SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1418 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1419 AArch64::ZPR3RegClassID,
1420 AArch64::ZPR4RegClassID};
1421 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1422 AArch64::zsub2, AArch64::zsub3};
1423
1424 return createTuple(Regs, RegClassIDs, SubRegs);
1425}
1426
1427SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1428 assert(Regs.size() == 2 || Regs.size() == 4);
1429
1430 // The createTuple interface requires 3 RegClassIDs for each possible
1431 // tuple type even though we only have them for ZPR2 and ZPR4.
1432 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1433 AArch64::ZPR4Mul4RegClassID};
1434 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1435 AArch64::zsub2, AArch64::zsub3};
1436 return createTuple(Regs, RegClassIDs, SubRegs);
1437}
1438
1439SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1440 const unsigned RegClassIDs[],
1441 const unsigned SubRegs[]) {
1442 // There's no special register-class for a vector-list of 1 element: it's just
1443 // a vector.
1444 if (Regs.size() == 1)
1445 return Regs[0];
1446
1447 assert(Regs.size() >= 2 && Regs.size() <= 4);
1448
1449 SDLoc DL(Regs[0]);
1450
1452
1453 // First operand of REG_SEQUENCE is the desired RegClass.
1454 Ops.push_back(
1455 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1456
1457 // Then we get pairs of source & subregister-position for the components.
1458 for (unsigned i = 0; i < Regs.size(); ++i) {
1459 Ops.push_back(Regs[i]);
1460 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1461 }
1462
1463 SDNode *N =
1464 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1465 return SDValue(N, 0);
1466}
1467
1468void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1469 bool isExt) {
1470 SDLoc dl(N);
1471 EVT VT = N->getValueType(0);
1472
1473 unsigned ExtOff = isExt;
1474
1475 // Form a REG_SEQUENCE to force register allocation.
1476 unsigned Vec0Off = ExtOff + 1;
1477 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1478 N->op_begin() + Vec0Off + NumVecs);
1479 SDValue RegSeq = createQTuple(Regs);
1480
1482 if (isExt)
1483 Ops.push_back(N->getOperand(1));
1484 Ops.push_back(RegSeq);
1485 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1486 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1487}
1488
1489static std::tuple<SDValue, SDValue>
1491 SDLoc DL(Disc);
1492 SDValue AddrDisc;
1493 SDValue ConstDisc;
1494
1495 // If this is a blend, remember the constant and address discriminators.
1496 // Otherwise, it's either a constant discriminator, or a non-blended
1497 // address discriminator.
1498 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1499 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1500 AddrDisc = Disc->getOperand(1);
1501 ConstDisc = Disc->getOperand(2);
1502 } else {
1503 ConstDisc = Disc;
1504 }
1505
1506 // If the constant discriminator (either the blend RHS, or the entire
1507 // discriminator value) isn't a 16-bit constant, bail out, and let the
1508 // discriminator be computed separately.
1509 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1510 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1511 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1512
1513 // If there's no address discriminator, use XZR directly.
1514 if (!AddrDisc)
1515 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1516
1517 return std::make_tuple(
1518 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1519 AddrDisc);
1520}
1521
1522void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1523 SDLoc DL(N);
1524 // IntrinsicID is operand #0
1525 SDValue Val = N->getOperand(1);
1526 SDValue AUTKey = N->getOperand(2);
1527 SDValue AUTDisc = N->getOperand(3);
1528
1529 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1530 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1531
1532 SDValue AUTAddrDisc, AUTConstDisc;
1533 std::tie(AUTConstDisc, AUTAddrDisc) =
1534 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1535
1536 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1537 AArch64::X16, Val, SDValue());
1538 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1539
1540 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUT, DL, MVT::i64, Ops);
1541 ReplaceNode(N, AUT);
1542}
1543
1544void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1545 SDLoc DL(N);
1546 // IntrinsicID is operand #0
1547 SDValue Val = N->getOperand(1);
1548 SDValue AUTKey = N->getOperand(2);
1549 SDValue AUTDisc = N->getOperand(3);
1550 SDValue PACKey = N->getOperand(4);
1551 SDValue PACDisc = N->getOperand(5);
1552
1553 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1554 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1555
1556 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1557 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1558
1559 SDValue AUTAddrDisc, AUTConstDisc;
1560 std::tie(AUTConstDisc, AUTAddrDisc) =
1561 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1562
1563 SDValue PACAddrDisc, PACConstDisc;
1564 std::tie(PACConstDisc, PACAddrDisc) =
1565 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1566
1567 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1568 AArch64::X16, Val, SDValue());
1569
1570 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1571 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1572
1573 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1574 ReplaceNode(N, AUTPAC);
1575}
1576
1577bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1578 LoadSDNode *LD = cast<LoadSDNode>(N);
1579 if (LD->isUnindexed())
1580 return false;
1581 EVT VT = LD->getMemoryVT();
1582 EVT DstVT = N->getValueType(0);
1583 ISD::MemIndexedMode AM = LD->getAddressingMode();
1584 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1585
1586 // We're not doing validity checking here. That was done when checking
1587 // if we should mark the load as indexed or not. We're just selecting
1588 // the right instruction.
1589 unsigned Opcode = 0;
1590
1591 ISD::LoadExtType ExtType = LD->getExtensionType();
1592 bool InsertTo64 = false;
1593 if (VT == MVT::i64)
1594 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1595 else if (VT == MVT::i32) {
1596 if (ExtType == ISD::NON_EXTLOAD)
1597 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1598 else if (ExtType == ISD::SEXTLOAD)
1599 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1600 else {
1601 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1602 InsertTo64 = true;
1603 // The result of the load is only i32. It's the subreg_to_reg that makes
1604 // it into an i64.
1605 DstVT = MVT::i32;
1606 }
1607 } else if (VT == MVT::i16) {
1608 if (ExtType == ISD::SEXTLOAD) {
1609 if (DstVT == MVT::i64)
1610 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1611 else
1612 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1613 } else {
1614 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1615 InsertTo64 = DstVT == MVT::i64;
1616 // The result of the load is only i32. It's the subreg_to_reg that makes
1617 // it into an i64.
1618 DstVT = MVT::i32;
1619 }
1620 } else if (VT == MVT::i8) {
1621 if (ExtType == ISD::SEXTLOAD) {
1622 if (DstVT == MVT::i64)
1623 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1624 else
1625 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1626 } else {
1627 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1628 InsertTo64 = DstVT == MVT::i64;
1629 // The result of the load is only i32. It's the subreg_to_reg that makes
1630 // it into an i64.
1631 DstVT = MVT::i32;
1632 }
1633 } else if (VT == MVT::f16) {
1634 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1635 } else if (VT == MVT::bf16) {
1636 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1637 } else if (VT == MVT::f32) {
1638 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1639 } else if (VT == MVT::f64 || VT.is64BitVector()) {
1640 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1641 } else if (VT.is128BitVector()) {
1642 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1643 } else
1644 return false;
1645 SDValue Chain = LD->getChain();
1646 SDValue Base = LD->getBasePtr();
1647 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1648 int OffsetVal = (int)OffsetOp->getZExtValue();
1649 SDLoc dl(N);
1650 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1651 SDValue Ops[] = { Base, Offset, Chain };
1652 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1653 MVT::Other, Ops);
1654
1655 // Transfer memoperands.
1656 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1657 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1658
1659 // Either way, we're replacing the node, so tell the caller that.
1660 SDValue LoadedVal = SDValue(Res, 1);
1661 if (InsertTo64) {
1662 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1663 LoadedVal =
1664 SDValue(CurDAG->getMachineNode(
1665 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1666 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1667 SubReg),
1668 0);
1669 }
1670
1671 ReplaceUses(SDValue(N, 0), LoadedVal);
1672 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1673 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1674 CurDAG->RemoveDeadNode(N);
1675 return true;
1676}
1677
1678void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1679 unsigned SubRegIdx) {
1680 SDLoc dl(N);
1681 EVT VT = N->getValueType(0);
1682 SDValue Chain = N->getOperand(0);
1683
1684 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1685 Chain};
1686
1687 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1688
1689 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1690 SDValue SuperReg = SDValue(Ld, 0);
1691 for (unsigned i = 0; i < NumVecs; ++i)
1692 ReplaceUses(SDValue(N, i),
1693 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1694
1695 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1696
1697 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1698 // because it's too simple to have needed special treatment during lowering.
1699 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1700 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1701 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1702 }
1703
1704 CurDAG->RemoveDeadNode(N);
1705}
1706
1707void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1708 unsigned Opc, unsigned SubRegIdx) {
1709 SDLoc dl(N);
1710 EVT VT = N->getValueType(0);
1711 SDValue Chain = N->getOperand(0);
1712
1713 SDValue Ops[] = {N->getOperand(1), // Mem operand
1714 N->getOperand(2), // Incremental
1715 Chain};
1716
1717 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1718 MVT::Untyped, MVT::Other};
1719
1720 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1721
1722 // Update uses of write back register
1723 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1724
1725 // Update uses of vector list
1726 SDValue SuperReg = SDValue(Ld, 1);
1727 if (NumVecs == 1)
1728 ReplaceUses(SDValue(N, 0), SuperReg);
1729 else
1730 for (unsigned i = 0; i < NumVecs; ++i)
1731 ReplaceUses(SDValue(N, i),
1732 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1733
1734 // Update the chain
1735 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1736 CurDAG->RemoveDeadNode(N);
1737}
1738
1739/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1740/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1741/// new Base and an SDValue representing the new offset.
1742std::tuple<unsigned, SDValue, SDValue>
1743AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1744 unsigned Opc_ri,
1745 const SDValue &OldBase,
1746 const SDValue &OldOffset,
1747 unsigned Scale) {
1748 SDValue NewBase = OldBase;
1749 SDValue NewOffset = OldOffset;
1750 // Detect a possible Reg+Imm addressing mode.
1751 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1752 N, OldBase, NewBase, NewOffset);
1753
1754 // Detect a possible reg+reg addressing mode, but only if we haven't already
1755 // detected a Reg+Imm one.
1756 const bool IsRegReg =
1757 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1758
1759 // Select the instruction.
1760 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1761}
1762
1763enum class SelectTypeKind {
1764 Int1 = 0,
1765 Int = 1,
1766 FP = 2,
1767 AnyType = 3,
1768};
1769
1770/// This function selects an opcode from a list of opcodes, which is
1771/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1772/// element types, in this order.
1773template <SelectTypeKind Kind>
1774static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1775 // Only match scalable vector VTs
1776 if (!VT.isScalableVector())
1777 return 0;
1778
1779 EVT EltVT = VT.getVectorElementType();
1780 unsigned Key = VT.getVectorMinNumElements();
1781 switch (Kind) {
1783 break;
1785 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1786 EltVT != MVT::i64)
1787 return 0;
1788 break;
1790 if (EltVT != MVT::i1)
1791 return 0;
1792 break;
1793 case SelectTypeKind::FP:
1794 if (EltVT == MVT::bf16)
1795 Key = 16;
1796 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1797 EltVT != MVT::f64)
1798 return 0;
1799 break;
1800 }
1801
1802 unsigned Offset;
1803 switch (Key) {
1804 case 16: // 8-bit or bf16
1805 Offset = 0;
1806 break;
1807 case 8: // 16-bit
1808 Offset = 1;
1809 break;
1810 case 4: // 32-bit
1811 Offset = 2;
1812 break;
1813 case 2: // 64-bit
1814 Offset = 3;
1815 break;
1816 default:
1817 return 0;
1818 }
1819
1820 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1821}
1822
1823// This function is almost identical to SelectWhilePair, but has an
1824// extra check on the range of the immediate operand.
1825// TODO: Merge these two functions together at some point?
1826void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1827 // Immediate can be either 0 or 1.
1828 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1829 if (Imm->getZExtValue() > 1)
1830 return;
1831
1832 SDLoc DL(N);
1833 EVT VT = N->getValueType(0);
1834 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1835 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1836 SDValue SuperReg = SDValue(WhilePair, 0);
1837
1838 for (unsigned I = 0; I < 2; ++I)
1839 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1840 AArch64::psub0 + I, DL, VT, SuperReg));
1841
1842 CurDAG->RemoveDeadNode(N);
1843}
1844
1845void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1846 SDLoc DL(N);
1847 EVT VT = N->getValueType(0);
1848
1849 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1850
1851 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1852 SDValue SuperReg = SDValue(WhilePair, 0);
1853
1854 for (unsigned I = 0; I < 2; ++I)
1855 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1856 AArch64::psub0 + I, DL, VT, SuperReg));
1857
1858 CurDAG->RemoveDeadNode(N);
1859}
1860
1861void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1862 unsigned Opcode) {
1863 EVT VT = N->getValueType(0);
1864 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1865 SDValue Ops = createZTuple(Regs);
1866 SDLoc DL(N);
1867 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1868 SDValue SuperReg = SDValue(Intrinsic, 0);
1869 for (unsigned i = 0; i < NumVecs; ++i)
1870 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1871 AArch64::zsub0 + i, DL, VT, SuperReg));
1872
1873 CurDAG->RemoveDeadNode(N);
1874}
1875
1876void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
1877 unsigned Opcode) {
1878 SDLoc DL(N);
1879 EVT VT = N->getValueType(0);
1880 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
1881 Ops.push_back(/*Chain*/ N->getOperand(0));
1882
1884 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
1885 SDValue SuperReg = SDValue(Instruction, 0);
1886
1887 for (unsigned i = 0; i < NumVecs; ++i)
1888 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1889 AArch64::zsub0 + i, DL, VT, SuperReg));
1890
1891 // Copy chain
1892 unsigned ChainIdx = NumVecs;
1893 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
1894 CurDAG->RemoveDeadNode(N);
1895}
1896
1897void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1898 unsigned NumVecs,
1899 bool IsZmMulti,
1900 unsigned Opcode,
1901 bool HasPred) {
1902 assert(Opcode != 0 && "Unexpected opcode");
1903
1904 SDLoc DL(N);
1905 EVT VT = N->getValueType(0);
1906 unsigned FirstVecIdx = HasPred ? 2 : 1;
1907
1908 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1909 SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs));
1910 return createZMulTuple(Regs);
1911 };
1912
1913 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1914
1915 SDValue Zm;
1916 if (IsZmMulti)
1917 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1918 else
1919 Zm = N->getOperand(NumVecs + FirstVecIdx);
1920
1922 if (HasPred)
1923 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1924 N->getOperand(1), Zdn, Zm);
1925 else
1926 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1927 SDValue SuperReg = SDValue(Intrinsic, 0);
1928 for (unsigned i = 0; i < NumVecs; ++i)
1929 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1930 AArch64::zsub0 + i, DL, VT, SuperReg));
1931
1932 CurDAG->RemoveDeadNode(N);
1933}
1934
1935void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1936 unsigned Scale, unsigned Opc_ri,
1937 unsigned Opc_rr, bool IsIntr) {
1938 assert(Scale < 5 && "Invalid scaling value.");
1939 SDLoc DL(N);
1940 EVT VT = N->getValueType(0);
1941 SDValue Chain = N->getOperand(0);
1942
1943 // Optimize addressing mode.
1945 unsigned Opc;
1946 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1947 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1948 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1949
1950 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
1951 Base, // Memory operand
1952 Offset, Chain};
1953
1954 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1955
1956 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1957 SDValue SuperReg = SDValue(Load, 0);
1958 for (unsigned i = 0; i < NumVecs; ++i)
1959 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1960 AArch64::zsub0 + i, DL, VT, SuperReg));
1961
1962 // Copy chain
1963 unsigned ChainIdx = NumVecs;
1964 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1965 CurDAG->RemoveDeadNode(N);
1966}
1967
1968void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
1969 unsigned NumVecs,
1970 unsigned Scale,
1971 unsigned Opc_ri,
1972 unsigned Opc_rr) {
1973 assert(Scale < 4 && "Invalid scaling value.");
1974 SDLoc DL(N);
1975 EVT VT = N->getValueType(0);
1976 SDValue Chain = N->getOperand(0);
1977
1978 SDValue PNg = N->getOperand(2);
1979 SDValue Base = N->getOperand(3);
1980 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
1981 unsigned Opc;
1982 std::tie(Opc, Base, Offset) =
1983 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
1984
1985 SDValue Ops[] = {PNg, // Predicate-as-counter
1986 Base, // Memory operand
1987 Offset, Chain};
1988
1989 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1990
1991 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1992 SDValue SuperReg = SDValue(Load, 0);
1993 for (unsigned i = 0; i < NumVecs; ++i)
1994 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1995 AArch64::zsub0 + i, DL, VT, SuperReg));
1996
1997 // Copy chain
1998 unsigned ChainIdx = NumVecs;
1999 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2000 CurDAG->RemoveDeadNode(N);
2001}
2002
2003void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2004 unsigned Opcode) {
2005 if (N->getValueType(0) != MVT::nxv4f32)
2006 return;
2007 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2008}
2009
2010void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2011 unsigned NumOutVecs,
2012 unsigned Opc,
2013 uint32_t MaxImm) {
2014 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2015 if (Imm->getZExtValue() > MaxImm)
2016 return;
2017
2018 SDValue ZtValue;
2019 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2020 return;
2021
2022 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
2023 SDLoc DL(Node);
2024 EVT VT = Node->getValueType(0);
2025
2027 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2028 SDValue SuperReg = SDValue(Instruction, 0);
2029
2030 for (unsigned I = 0; I < NumOutVecs; ++I)
2031 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2032 AArch64::zsub0 + I, DL, VT, SuperReg));
2033
2034 // Copy chain
2035 unsigned ChainIdx = NumOutVecs;
2036 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2037 CurDAG->RemoveDeadNode(Node);
2038}
2039
2040void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2041 unsigned NumOutVecs,
2042 unsigned Opc) {
2043
2044 SDValue ZtValue;
2046 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2047 return;
2048
2049 Ops.push_back(ZtValue);
2050 Ops.push_back(createZMulTuple({Node->getOperand(3), Node->getOperand(4)}));
2051 SDLoc DL(Node);
2052 EVT VT = Node->getValueType(0);
2053
2055 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2056 SDValue SuperReg = SDValue(Instruction, 0);
2057
2058 for (unsigned I = 0; I < NumOutVecs; ++I)
2059 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2060 AArch64::zsub0 + I, DL, VT, SuperReg));
2061
2062 // Copy chain
2063 unsigned ChainIdx = NumOutVecs;
2064 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2065 CurDAG->RemoveDeadNode(Node);
2066}
2067
2068void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2069 unsigned Op) {
2070 SDLoc DL(N);
2071 EVT VT = N->getValueType(0);
2072
2073 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2074 SDValue Zd = createZMulTuple(Regs);
2075 SDValue Zn = N->getOperand(1 + NumVecs);
2076 SDValue Zm = N->getOperand(2 + NumVecs);
2077
2078 SDValue Ops[] = {Zd, Zn, Zm};
2079
2080 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2081 SDValue SuperReg = SDValue(Intrinsic, 0);
2082 for (unsigned i = 0; i < NumVecs; ++i)
2083 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2084 AArch64::zsub0 + i, DL, VT, SuperReg));
2085
2086 CurDAG->RemoveDeadNode(N);
2087}
2088
2089bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2090 switch (BaseReg) {
2091 default:
2092 return false;
2093 case AArch64::ZA:
2094 case AArch64::ZAB0:
2095 if (TileNum == 0)
2096 break;
2097 return false;
2098 case AArch64::ZAH0:
2099 if (TileNum <= 1)
2100 break;
2101 return false;
2102 case AArch64::ZAS0:
2103 if (TileNum <= 3)
2104 break;
2105 return false;
2106 case AArch64::ZAD0:
2107 if (TileNum <= 7)
2108 break;
2109 return false;
2110 }
2111
2112 BaseReg += TileNum;
2113 return true;
2114}
2115
2116template <unsigned MaxIdx, unsigned Scale>
2117void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2118 unsigned BaseReg, unsigned Op) {
2119 unsigned TileNum = 0;
2120 if (BaseReg != AArch64::ZA)
2121 TileNum = N->getConstantOperandVal(2);
2122
2123 if (!SelectSMETile(BaseReg, TileNum))
2124 return;
2125
2126 SDValue SliceBase, Base, Offset;
2127 if (BaseReg == AArch64::ZA)
2128 SliceBase = N->getOperand(2);
2129 else
2130 SliceBase = N->getOperand(3);
2131
2132 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2133 return;
2134
2135 SDLoc DL(N);
2136 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2137 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2138 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2139
2140 EVT VT = N->getValueType(0);
2141 for (unsigned I = 0; I < NumVecs; ++I)
2142 ReplaceUses(SDValue(N, I),
2143 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2144 SDValue(Mov, 0)));
2145 // Copy chain
2146 unsigned ChainIdx = NumVecs;
2147 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2148 CurDAG->RemoveDeadNode(N);
2149}
2150
2151void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2152 unsigned Op, unsigned MaxIdx,
2153 unsigned Scale, unsigned BaseReg) {
2154 // Slice can be in different positions
2155 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2156 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2157 SDValue SliceBase = N->getOperand(2);
2158 if (BaseReg != AArch64::ZA)
2159 SliceBase = N->getOperand(3);
2160
2162 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2163 return;
2164 // The correct Za tile number is computed in Machine Instruction
2165 // See EmitZAInstr
2166 // DAG cannot select Za tile as an output register with ZReg
2167 SDLoc DL(N);
2169 if (BaseReg != AArch64::ZA )
2170 Ops.push_back(N->getOperand(2));
2171 Ops.push_back(Base);
2172 Ops.push_back(Offset);
2173 Ops.push_back(N->getOperand(0)); //Chain
2174 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2175
2176 EVT VT = N->getValueType(0);
2177 for (unsigned I = 0; I < NumVecs; ++I)
2178 ReplaceUses(SDValue(N, I),
2179 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2180 SDValue(Mov, 0)));
2181
2182 // Copy chain
2183 unsigned ChainIdx = NumVecs;
2184 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2185 CurDAG->RemoveDeadNode(N);
2186}
2187
2188void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2189 unsigned NumOutVecs,
2190 bool IsTupleInput,
2191 unsigned Opc) {
2192 SDLoc DL(N);
2193 EVT VT = N->getValueType(0);
2194 unsigned NumInVecs = N->getNumOperands() - 1;
2195
2197 if (IsTupleInput) {
2198 assert((NumInVecs == 2 || NumInVecs == 4) &&
2199 "Don't know how to handle multi-register input!");
2200 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2201 Ops.push_back(createZMulTuple(Regs));
2202 } else {
2203 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2204 for (unsigned I = 0; I < NumInVecs; I++)
2205 Ops.push_back(N->getOperand(1 + I));
2206 }
2207
2208 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2209 SDValue SuperReg = SDValue(Res, 0);
2210
2211 for (unsigned I = 0; I < NumOutVecs; I++)
2212 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2213 AArch64::zsub0 + I, DL, VT, SuperReg));
2214 CurDAG->RemoveDeadNode(N);
2215}
2216
2217void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2218 unsigned Opc) {
2219 SDLoc dl(N);
2220 EVT VT = N->getOperand(2)->getValueType(0);
2221
2222 // Form a REG_SEQUENCE to force register allocation.
2223 bool Is128Bit = VT.getSizeInBits() == 128;
2224 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2225 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2226
2227 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2228 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2229
2230 // Transfer memoperands.
2231 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2232 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2233
2234 ReplaceNode(N, St);
2235}
2236
2237void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2238 unsigned Scale, unsigned Opc_rr,
2239 unsigned Opc_ri) {
2240 SDLoc dl(N);
2241
2242 // Form a REG_SEQUENCE to force register allocation.
2243 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2244 SDValue RegSeq = createZTuple(Regs);
2245
2246 // Optimize addressing mode.
2247 unsigned Opc;
2249 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2250 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2251 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2252
2253 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2254 Base, // address
2255 Offset, // offset
2256 N->getOperand(0)}; // chain
2257 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2258
2259 ReplaceNode(N, St);
2260}
2261
2262bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2263 SDValue &OffImm) {
2264 SDLoc dl(N);
2265 const DataLayout &DL = CurDAG->getDataLayout();
2266 const TargetLowering *TLI = getTargetLowering();
2267
2268 // Try to match it for the frame address
2269 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2270 int FI = FINode->getIndex();
2271 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2272 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2273 return true;
2274 }
2275
2276 return false;
2277}
2278
2279void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2280 unsigned Opc) {
2281 SDLoc dl(N);
2282 EVT VT = N->getOperand(2)->getValueType(0);
2283 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2284 MVT::Other}; // Type for the Chain
2285
2286 // Form a REG_SEQUENCE to force register allocation.
2287 bool Is128Bit = VT.getSizeInBits() == 128;
2288 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2289 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2290
2291 SDValue Ops[] = {RegSeq,
2292 N->getOperand(NumVecs + 1), // base register
2293 N->getOperand(NumVecs + 2), // Incremental
2294 N->getOperand(0)}; // Chain
2295 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2296
2297 ReplaceNode(N, St);
2298}
2299
2300namespace {
2301/// WidenVector - Given a value in the V64 register class, produce the
2302/// equivalent value in the V128 register class.
2303class WidenVector {
2304 SelectionDAG &DAG;
2305
2306public:
2307 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2308
2309 SDValue operator()(SDValue V64Reg) {
2310 EVT VT = V64Reg.getValueType();
2311 unsigned NarrowSize = VT.getVectorNumElements();
2312 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2313 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2314 SDLoc DL(V64Reg);
2315
2316 SDValue Undef =
2317 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2318 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2319 }
2320};
2321} // namespace
2322
2323/// NarrowVector - Given a value in the V128 register class, produce the
2324/// equivalent value in the V64 register class.
2326 EVT VT = V128Reg.getValueType();
2327 unsigned WideSize = VT.getVectorNumElements();
2328 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2329 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2330
2331 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2332 V128Reg);
2333}
2334
2335void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2336 unsigned Opc) {
2337 SDLoc dl(N);
2338 EVT VT = N->getValueType(0);
2339 bool Narrow = VT.getSizeInBits() == 64;
2340
2341 // Form a REG_SEQUENCE to force register allocation.
2342 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2343
2344 if (Narrow)
2345 transform(Regs, Regs.begin(),
2346 WidenVector(*CurDAG));
2347
2348 SDValue RegSeq = createQTuple(Regs);
2349
2350 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2351
2352 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2353
2354 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2355 N->getOperand(NumVecs + 3), N->getOperand(0)};
2356 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2357 SDValue SuperReg = SDValue(Ld, 0);
2358
2359 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2360 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2361 AArch64::qsub2, AArch64::qsub3 };
2362 for (unsigned i = 0; i < NumVecs; ++i) {
2363 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2364 if (Narrow)
2365 NV = NarrowVector(NV, *CurDAG);
2366 ReplaceUses(SDValue(N, i), NV);
2367 }
2368
2369 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2370 CurDAG->RemoveDeadNode(N);
2371}
2372
2373void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2374 unsigned Opc) {
2375 SDLoc dl(N);
2376 EVT VT = N->getValueType(0);
2377 bool Narrow = VT.getSizeInBits() == 64;
2378
2379 // Form a REG_SEQUENCE to force register allocation.
2380 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2381
2382 if (Narrow)
2383 transform(Regs, Regs.begin(),
2384 WidenVector(*CurDAG));
2385
2386 SDValue RegSeq = createQTuple(Regs);
2387
2388 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2389 RegSeq->getValueType(0), MVT::Other};
2390
2391 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2392
2393 SDValue Ops[] = {RegSeq,
2394 CurDAG->getTargetConstant(LaneNo, dl,
2395 MVT::i64), // Lane Number
2396 N->getOperand(NumVecs + 2), // Base register
2397 N->getOperand(NumVecs + 3), // Incremental
2398 N->getOperand(0)};
2399 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2400
2401 // Update uses of the write back register
2402 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2403
2404 // Update uses of the vector list
2405 SDValue SuperReg = SDValue(Ld, 1);
2406 if (NumVecs == 1) {
2407 ReplaceUses(SDValue(N, 0),
2408 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2409 } else {
2410 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2411 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2412 AArch64::qsub2, AArch64::qsub3 };
2413 for (unsigned i = 0; i < NumVecs; ++i) {
2414 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2415 SuperReg);
2416 if (Narrow)
2417 NV = NarrowVector(NV, *CurDAG);
2418 ReplaceUses(SDValue(N, i), NV);
2419 }
2420 }
2421
2422 // Update the Chain
2423 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2424 CurDAG->RemoveDeadNode(N);
2425}
2426
2427void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2428 unsigned Opc) {
2429 SDLoc dl(N);
2430 EVT VT = N->getOperand(2)->getValueType(0);
2431 bool Narrow = VT.getSizeInBits() == 64;
2432
2433 // Form a REG_SEQUENCE to force register allocation.
2434 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2435
2436 if (Narrow)
2437 transform(Regs, Regs.begin(),
2438 WidenVector(*CurDAG));
2439
2440 SDValue RegSeq = createQTuple(Regs);
2441
2442 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2443
2444 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2445 N->getOperand(NumVecs + 3), N->getOperand(0)};
2446 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2447
2448 // Transfer memoperands.
2449 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2450 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2451
2452 ReplaceNode(N, St);
2453}
2454
2455void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2456 unsigned Opc) {
2457 SDLoc dl(N);
2458 EVT VT = N->getOperand(2)->getValueType(0);
2459 bool Narrow = VT.getSizeInBits() == 64;
2460
2461 // Form a REG_SEQUENCE to force register allocation.
2462 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2463
2464 if (Narrow)
2465 transform(Regs, Regs.begin(),
2466 WidenVector(*CurDAG));
2467
2468 SDValue RegSeq = createQTuple(Regs);
2469
2470 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2471 MVT::Other};
2472
2473 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2474
2475 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2476 N->getOperand(NumVecs + 2), // Base Register
2477 N->getOperand(NumVecs + 3), // Incremental
2478 N->getOperand(0)};
2479 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2480
2481 // Transfer memoperands.
2482 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2483 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2484
2485 ReplaceNode(N, St);
2486}
2487
2489 unsigned &Opc, SDValue &Opd0,
2490 unsigned &LSB, unsigned &MSB,
2491 unsigned NumberOfIgnoredLowBits,
2492 bool BiggerPattern) {
2493 assert(N->getOpcode() == ISD::AND &&
2494 "N must be a AND operation to call this function");
2495
2496 EVT VT = N->getValueType(0);
2497
2498 // Here we can test the type of VT and return false when the type does not
2499 // match, but since it is done prior to that call in the current context
2500 // we turned that into an assert to avoid redundant code.
2501 assert((VT == MVT::i32 || VT == MVT::i64) &&
2502 "Type checking must have been done before calling this function");
2503
2504 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2505 // changed the AND node to a 32-bit mask operation. We'll have to
2506 // undo that as part of the transform here if we want to catch all
2507 // the opportunities.
2508 // Currently the NumberOfIgnoredLowBits argument helps to recover
2509 // from these situations when matching bigger pattern (bitfield insert).
2510
2511 // For unsigned extracts, check for a shift right and mask
2512 uint64_t AndImm = 0;
2513 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2514 return false;
2515
2516 const SDNode *Op0 = N->getOperand(0).getNode();
2517
2518 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2519 // simplified. Try to undo that
2520 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2521
2522 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2523 if (AndImm & (AndImm + 1))
2524 return false;
2525
2526 bool ClampMSB = false;
2527 uint64_t SrlImm = 0;
2528 // Handle the SRL + ANY_EXTEND case.
2529 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2530 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2531 // Extend the incoming operand of the SRL to 64-bit.
2532 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2533 // Make sure to clamp the MSB so that we preserve the semantics of the
2534 // original operations.
2535 ClampMSB = true;
2536 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2538 SrlImm)) {
2539 // If the shift result was truncated, we can still combine them.
2540 Opd0 = Op0->getOperand(0).getOperand(0);
2541
2542 // Use the type of SRL node.
2543 VT = Opd0->getValueType(0);
2544 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2545 Opd0 = Op0->getOperand(0);
2546 ClampMSB = (VT == MVT::i32);
2547 } else if (BiggerPattern) {
2548 // Let's pretend a 0 shift right has been performed.
2549 // The resulting code will be at least as good as the original one
2550 // plus it may expose more opportunities for bitfield insert pattern.
2551 // FIXME: Currently we limit this to the bigger pattern, because
2552 // some optimizations expect AND and not UBFM.
2553 Opd0 = N->getOperand(0);
2554 } else
2555 return false;
2556
2557 // Bail out on large immediates. This happens when no proper
2558 // combining/constant folding was performed.
2559 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2560 LLVM_DEBUG(
2561 (dbgs() << N
2562 << ": Found large shift immediate, this should not happen\n"));
2563 return false;
2564 }
2565
2566 LSB = SrlImm;
2567 MSB = SrlImm +
2568 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2569 : llvm::countr_one<uint64_t>(AndImm)) -
2570 1;
2571 if (ClampMSB)
2572 // Since we're moving the extend before the right shift operation, we need
2573 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2574 // the zeros which would get shifted in with the original right shift
2575 // operation.
2576 MSB = MSB > 31 ? 31 : MSB;
2577
2578 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2579 return true;
2580}
2581
2582static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2583 SDValue &Opd0, unsigned &Immr,
2584 unsigned &Imms) {
2585 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2586
2587 EVT VT = N->getValueType(0);
2588 unsigned BitWidth = VT.getSizeInBits();
2589 assert((VT == MVT::i32 || VT == MVT::i64) &&
2590 "Type checking must have been done before calling this function");
2591
2592 SDValue Op = N->getOperand(0);
2593 if (Op->getOpcode() == ISD::TRUNCATE) {
2594 Op = Op->getOperand(0);
2595 VT = Op->getValueType(0);
2596 BitWidth = VT.getSizeInBits();
2597 }
2598
2599 uint64_t ShiftImm;
2600 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2601 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2602 return false;
2603
2604 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2605 if (ShiftImm + Width > BitWidth)
2606 return false;
2607
2608 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2609 Opd0 = Op.getOperand(0);
2610 Immr = ShiftImm;
2611 Imms = ShiftImm + Width - 1;
2612 return true;
2613}
2614
2615static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2616 SDValue &Opd0, unsigned &LSB,
2617 unsigned &MSB) {
2618 // We are looking for the following pattern which basically extracts several
2619 // continuous bits from the source value and places it from the LSB of the
2620 // destination value, all other bits of the destination value or set to zero:
2621 //
2622 // Value2 = AND Value, MaskImm
2623 // SRL Value2, ShiftImm
2624 //
2625 // with MaskImm >> ShiftImm to search for the bit width.
2626 //
2627 // This gets selected into a single UBFM:
2628 //
2629 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2630 //
2631
2632 if (N->getOpcode() != ISD::SRL)
2633 return false;
2634
2635 uint64_t AndMask = 0;
2636 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2637 return false;
2638
2639 Opd0 = N->getOperand(0).getOperand(0);
2640
2641 uint64_t SrlImm = 0;
2642 if (!isIntImmediate(N->getOperand(1), SrlImm))
2643 return false;
2644
2645 // Check whether we really have several bits extract here.
2646 if (!isMask_64(AndMask >> SrlImm))
2647 return false;
2648
2649 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2650 LSB = SrlImm;
2651 MSB = llvm::Log2_64(AndMask);
2652 return true;
2653}
2654
2655static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2656 unsigned &Immr, unsigned &Imms,
2657 bool BiggerPattern) {
2658 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2659 "N must be a SHR/SRA operation to call this function");
2660
2661 EVT VT = N->getValueType(0);
2662
2663 // Here we can test the type of VT and return false when the type does not
2664 // match, but since it is done prior to that call in the current context
2665 // we turned that into an assert to avoid redundant code.
2666 assert((VT == MVT::i32 || VT == MVT::i64) &&
2667 "Type checking must have been done before calling this function");
2668
2669 // Check for AND + SRL doing several bits extract.
2670 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2671 return true;
2672
2673 // We're looking for a shift of a shift.
2674 uint64_t ShlImm = 0;
2675 uint64_t TruncBits = 0;
2676 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2677 Opd0 = N->getOperand(0).getOperand(0);
2678 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2679 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2680 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2681 // be considered as setting high 32 bits as zero. Our strategy here is to
2682 // always generate 64bit UBFM. This consistency will help the CSE pass
2683 // later find more redundancy.
2684 Opd0 = N->getOperand(0).getOperand(0);
2685 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2686 VT = Opd0.getValueType();
2687 assert(VT == MVT::i64 && "the promoted type should be i64");
2688 } else if (BiggerPattern) {
2689 // Let's pretend a 0 shift left has been performed.
2690 // FIXME: Currently we limit this to the bigger pattern case,
2691 // because some optimizations expect AND and not UBFM
2692 Opd0 = N->getOperand(0);
2693 } else
2694 return false;
2695
2696 // Missing combines/constant folding may have left us with strange
2697 // constants.
2698 if (ShlImm >= VT.getSizeInBits()) {
2699 LLVM_DEBUG(
2700 (dbgs() << N
2701 << ": Found large shift immediate, this should not happen\n"));
2702 return false;
2703 }
2704
2705 uint64_t SrlImm = 0;
2706 if (!isIntImmediate(N->getOperand(1), SrlImm))
2707 return false;
2708
2709 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2710 "bad amount in shift node!");
2711 int immr = SrlImm - ShlImm;
2712 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2713 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2714 // SRA requires a signed extraction
2715 if (VT == MVT::i32)
2716 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2717 else
2718 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2719 return true;
2720}
2721
2722bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2723 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2724
2725 EVT VT = N->getValueType(0);
2726 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2727 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2728 return false;
2729
2730 uint64_t ShiftImm;
2731 SDValue Op = N->getOperand(0);
2732 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2733 return false;
2734
2735 SDLoc dl(N);
2736 // Extend the incoming operand of the shift to 64-bits.
2737 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2738 unsigned Immr = ShiftImm;
2739 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2740 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2741 CurDAG->getTargetConstant(Imms, dl, VT)};
2742 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2743 return true;
2744}
2745
2746static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2747 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2748 unsigned NumberOfIgnoredLowBits = 0,
2749 bool BiggerPattern = false) {
2750 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2751 return false;
2752
2753 switch (N->getOpcode()) {
2754 default:
2755 if (!N->isMachineOpcode())
2756 return false;
2757 break;
2758 case ISD::AND:
2759 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2760 NumberOfIgnoredLowBits, BiggerPattern);
2761 case ISD::SRL:
2762 case ISD::SRA:
2763 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2764
2766 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2767 }
2768
2769 unsigned NOpc = N->getMachineOpcode();
2770 switch (NOpc) {
2771 default:
2772 return false;
2773 case AArch64::SBFMWri:
2774 case AArch64::UBFMWri:
2775 case AArch64::SBFMXri:
2776 case AArch64::UBFMXri:
2777 Opc = NOpc;
2778 Opd0 = N->getOperand(0);
2779 Immr = N->getConstantOperandVal(1);
2780 Imms = N->getConstantOperandVal(2);
2781 return true;
2782 }
2783 // Unreachable
2784 return false;
2785}
2786
2787bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2788 unsigned Opc, Immr, Imms;
2789 SDValue Opd0;
2790 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2791 return false;
2792
2793 EVT VT = N->getValueType(0);
2794 SDLoc dl(N);
2795
2796 // If the bit extract operation is 64bit but the original type is 32bit, we
2797 // need to add one EXTRACT_SUBREG.
2798 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2799 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2800 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2801
2802 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2803 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2804 MVT::i32, SDValue(BFM, 0));
2805 ReplaceNode(N, Inner.getNode());
2806 return true;
2807 }
2808
2809 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2810 CurDAG->getTargetConstant(Imms, dl, VT)};
2811 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2812 return true;
2813}
2814
2815/// Does DstMask form a complementary pair with the mask provided by
2816/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2817/// this asks whether DstMask zeroes precisely those bits that will be set by
2818/// the other half.
2819static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2820 unsigned NumberOfIgnoredHighBits, EVT VT) {
2821 assert((VT == MVT::i32 || VT == MVT::i64) &&
2822 "i32 or i64 mask type expected!");
2823 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2824
2825 // Enable implicitTrunc as we're intentionally ignoring high bits.
2826 APInt SignificantDstMask =
2827 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
2828 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2829
2830 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2831 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2832}
2833
2834// Look for bits that will be useful for later uses.
2835// A bit is consider useless as soon as it is dropped and never used
2836// before it as been dropped.
2837// E.g., looking for useful bit of x
2838// 1. y = x & 0x7
2839// 2. z = y >> 2
2840// After #1, x useful bits are 0x7, then the useful bits of x, live through
2841// y.
2842// After #2, the useful bits of x are 0x4.
2843// However, if x is used on an unpredicatable instruction, then all its bits
2844// are useful.
2845// E.g.
2846// 1. y = x & 0x7
2847// 2. z = y >> 2
2848// 3. str x, [@x]
2849static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2850
2852 unsigned Depth) {
2853 uint64_t Imm =
2854 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2855 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2856 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2857 getUsefulBits(Op, UsefulBits, Depth + 1);
2858}
2859
2861 uint64_t Imm, uint64_t MSB,
2862 unsigned Depth) {
2863 // inherit the bitwidth value
2864 APInt OpUsefulBits(UsefulBits);
2865 OpUsefulBits = 1;
2866
2867 if (MSB >= Imm) {
2868 OpUsefulBits <<= MSB - Imm + 1;
2869 --OpUsefulBits;
2870 // The interesting part will be in the lower part of the result
2871 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2872 // The interesting part was starting at Imm in the argument
2873 OpUsefulBits <<= Imm;
2874 } else {
2875 OpUsefulBits <<= MSB + 1;
2876 --OpUsefulBits;
2877 // The interesting part will be shifted in the result
2878 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2879 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2880 // The interesting part was at zero in the argument
2881 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2882 }
2883
2884 UsefulBits &= OpUsefulBits;
2885}
2886
2887static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2888 unsigned Depth) {
2889 uint64_t Imm =
2890 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2891 uint64_t MSB =
2892 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2893
2894 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2895}
2896
2898 unsigned Depth) {
2899 uint64_t ShiftTypeAndValue =
2900 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2901 APInt Mask(UsefulBits);
2902 Mask.clearAllBits();
2903 Mask.flipAllBits();
2904
2905 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2906 // Shift Left
2907 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2908 Mask <<= ShiftAmt;
2909 getUsefulBits(Op, Mask, Depth + 1);
2910 Mask.lshrInPlace(ShiftAmt);
2911 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2912 // Shift Right
2913 // We do not handle AArch64_AM::ASR, because the sign will change the
2914 // number of useful bits
2915 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2916 Mask.lshrInPlace(ShiftAmt);
2917 getUsefulBits(Op, Mask, Depth + 1);
2918 Mask <<= ShiftAmt;
2919 } else
2920 return;
2921
2922 UsefulBits &= Mask;
2923}
2924
2925static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2926 unsigned Depth) {
2927 uint64_t Imm =
2928 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2929 uint64_t MSB =
2930 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2931
2932 APInt OpUsefulBits(UsefulBits);
2933 OpUsefulBits = 1;
2934
2935 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2936 ResultUsefulBits.flipAllBits();
2937 APInt Mask(UsefulBits.getBitWidth(), 0);
2938
2939 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2940
2941 if (MSB >= Imm) {
2942 // The instruction is a BFXIL.
2943 uint64_t Width = MSB - Imm + 1;
2944 uint64_t LSB = Imm;
2945
2946 OpUsefulBits <<= Width;
2947 --OpUsefulBits;
2948
2949 if (Op.getOperand(1) == Orig) {
2950 // Copy the low bits from the result to bits starting from LSB.
2951 Mask = ResultUsefulBits & OpUsefulBits;
2952 Mask <<= LSB;
2953 }
2954
2955 if (Op.getOperand(0) == Orig)
2956 // Bits starting from LSB in the input contribute to the result.
2957 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2958 } else {
2959 // The instruction is a BFI.
2960 uint64_t Width = MSB + 1;
2961 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2962
2963 OpUsefulBits <<= Width;
2964 --OpUsefulBits;
2965 OpUsefulBits <<= LSB;
2966
2967 if (Op.getOperand(1) == Orig) {
2968 // Copy the bits from the result to the zero bits.
2969 Mask = ResultUsefulBits & OpUsefulBits;
2970 Mask.lshrInPlace(LSB);
2971 }
2972
2973 if (Op.getOperand(0) == Orig)
2974 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2975 }
2976
2977 UsefulBits &= Mask;
2978}
2979
2980static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2981 SDValue Orig, unsigned Depth) {
2982
2983 // Users of this node should have already been instruction selected
2984 // FIXME: Can we turn that into an assert?
2985 if (!UserNode->isMachineOpcode())
2986 return;
2987
2988 switch (UserNode->getMachineOpcode()) {
2989 default:
2990 return;
2991 case AArch64::ANDSWri:
2992 case AArch64::ANDSXri:
2993 case AArch64::ANDWri:
2994 case AArch64::ANDXri:
2995 // We increment Depth only when we call the getUsefulBits
2996 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2997 Depth);
2998 case AArch64::UBFMWri:
2999 case AArch64::UBFMXri:
3000 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3001
3002 case AArch64::ORRWrs:
3003 case AArch64::ORRXrs:
3004 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3005 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3006 Depth);
3007 return;
3008 case AArch64::BFMWri:
3009 case AArch64::BFMXri:
3010 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3011
3012 case AArch64::STRBBui:
3013 case AArch64::STURBBi:
3014 if (UserNode->getOperand(0) != Orig)
3015 return;
3016 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3017 return;
3018
3019 case AArch64::STRHHui:
3020 case AArch64::STURHHi:
3021 if (UserNode->getOperand(0) != Orig)
3022 return;
3023 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3024 return;
3025 }
3026}
3027
3028static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3030 return;
3031 // Initialize UsefulBits
3032 if (!Depth) {
3033 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3034 // At the beginning, assume every produced bits is useful
3035 UsefulBits = APInt(Bitwidth, 0);
3036 UsefulBits.flipAllBits();
3037 }
3038 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3039
3040 for (SDNode *Node : Op.getNode()->users()) {
3041 // A use cannot produce useful bits
3042 APInt UsefulBitsForUse = APInt(UsefulBits);
3043 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3044 UsersUsefulBits |= UsefulBitsForUse;
3045 }
3046 // UsefulBits contains the produced bits that are meaningful for the
3047 // current definition, thus a user cannot make a bit meaningful at
3048 // this point
3049 UsefulBits &= UsersUsefulBits;
3050}
3051
3052/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3053/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3054/// 0, return Op unchanged.
3055static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3056 if (ShlAmount == 0)
3057 return Op;
3058
3059 EVT VT = Op.getValueType();
3060 SDLoc dl(Op);
3061 unsigned BitWidth = VT.getSizeInBits();
3062 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3063
3064 SDNode *ShiftNode;
3065 if (ShlAmount > 0) {
3066 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3067 ShiftNode = CurDAG->getMachineNode(
3068 UBFMOpc, dl, VT, Op,
3069 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3070 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3071 } else {
3072 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3073 assert(ShlAmount < 0 && "expected right shift");
3074 int ShrAmount = -ShlAmount;
3075 ShiftNode = CurDAG->getMachineNode(
3076 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3077 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3078 }
3079
3080 return SDValue(ShiftNode, 0);
3081}
3082
3083// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3085 bool BiggerPattern,
3086 const uint64_t NonZeroBits,
3087 SDValue &Src, int &DstLSB,
3088 int &Width);
3089
3090// For bit-field-positioning pattern "shl VAL, N)".
3092 bool BiggerPattern,
3093 const uint64_t NonZeroBits,
3094 SDValue &Src, int &DstLSB,
3095 int &Width);
3096
3097/// Does this tree qualify as an attempt to move a bitfield into position,
3098/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3100 bool BiggerPattern, SDValue &Src,
3101 int &DstLSB, int &Width) {
3102 EVT VT = Op.getValueType();
3103 unsigned BitWidth = VT.getSizeInBits();
3104 (void)BitWidth;
3105 assert(BitWidth == 32 || BitWidth == 64);
3106
3107 KnownBits Known = CurDAG->computeKnownBits(Op);
3108
3109 // Non-zero in the sense that they're not provably zero, which is the key
3110 // point if we want to use this value
3111 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3112 if (!isShiftedMask_64(NonZeroBits))
3113 return false;
3114
3115 switch (Op.getOpcode()) {
3116 default:
3117 break;
3118 case ISD::AND:
3119 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3120 NonZeroBits, Src, DstLSB, Width);
3121 case ISD::SHL:
3122 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3123 NonZeroBits, Src, DstLSB, Width);
3124 }
3125
3126 return false;
3127}
3128
3130 bool BiggerPattern,
3131 const uint64_t NonZeroBits,
3132 SDValue &Src, int &DstLSB,
3133 int &Width) {
3134 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3135
3136 EVT VT = Op.getValueType();
3137 assert((VT == MVT::i32 || VT == MVT::i64) &&
3138 "Caller guarantees VT is one of i32 or i64");
3139 (void)VT;
3140
3141 uint64_t AndImm;
3142 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3143 return false;
3144
3145 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3146 // 1) (AndImm & (1 << POS) == 0)
3147 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3148 //
3149 // 1) and 2) don't agree so something must be wrong (e.g., in
3150 // 'SelectionDAG::computeKnownBits')
3151 assert((~AndImm & NonZeroBits) == 0 &&
3152 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3153
3154 SDValue AndOp0 = Op.getOperand(0);
3155
3156 uint64_t ShlImm;
3157 SDValue ShlOp0;
3158 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3159 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3160 ShlOp0 = AndOp0.getOperand(0);
3161 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3163 ShlImm)) {
3164 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3165
3166 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3167 SDValue ShlVal = AndOp0.getOperand(0);
3168
3169 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3170 // expect VT to be MVT::i32.
3171 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3172
3173 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3174 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3175 } else
3176 return false;
3177
3178 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3179 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3180 // AndOp0+AND.
3181 if (!BiggerPattern && !AndOp0.hasOneUse())
3182 return false;
3183
3184 DstLSB = llvm::countr_zero(NonZeroBits);
3185 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3186
3187 // Bail out on large Width. This happens when no proper combining / constant
3188 // folding was performed.
3189 if (Width >= (int)VT.getSizeInBits()) {
3190 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3191 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3192 // "val".
3193 // If VT is i32, what Width >= 32 means:
3194 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3195 // demands at least 'Width' bits (after dag-combiner). This together with
3196 // `any_extend` Op (undefined higher bits) indicates missed combination
3197 // when lowering the 'and' IR instruction to an machine IR instruction.
3198 LLVM_DEBUG(
3199 dbgs()
3200 << "Found large Width in bit-field-positioning -- this indicates no "
3201 "proper combining / constant folding was performed\n");
3202 return false;
3203 }
3204
3205 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3206 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3207 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3208 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3209 // which case it is not profitable to insert an extra shift.
3210 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3211 return false;
3212
3213 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3214 return true;
3215}
3216
3217// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3218// UBFIZ.
3220 SDValue &Src, int &DstLSB,
3221 int &Width) {
3222 // Caller should have verified that N is a left shift with constant shift
3223 // amount; asserts that.
3224 assert(Op.getOpcode() == ISD::SHL &&
3225 "Op.getNode() should be a SHL node to call this function");
3226 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3227 "Op.getNode() should shift ShlImm to call this function");
3228
3229 uint64_t AndImm = 0;
3230 SDValue Op0 = Op.getOperand(0);
3231 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3232 return false;
3233
3234 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3235 if (isMask_64(ShiftedAndImm)) {
3236 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3237 // should end with Mask, and could be prefixed with random bits if those
3238 // bits are shifted out.
3239 //
3240 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3241 // the AND result corresponding to those bits are shifted out, so it's fine
3242 // to not extract them.
3243 Width = llvm::countr_one(ShiftedAndImm);
3244 DstLSB = ShlImm;
3245 Src = Op0.getOperand(0);
3246 return true;
3247 }
3248 return false;
3249}
3250
3252 bool BiggerPattern,
3253 const uint64_t NonZeroBits,
3254 SDValue &Src, int &DstLSB,
3255 int &Width) {
3256 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3257
3258 EVT VT = Op.getValueType();
3259 assert((VT == MVT::i32 || VT == MVT::i64) &&
3260 "Caller guarantees that type is i32 or i64");
3261 (void)VT;
3262
3263 uint64_t ShlImm;
3264 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3265 return false;
3266
3267 if (!BiggerPattern && !Op.hasOneUse())
3268 return false;
3269
3270 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3271 return true;
3272
3273 DstLSB = llvm::countr_zero(NonZeroBits);
3274 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3275
3276 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3277 return false;
3278
3279 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3280 return true;
3281}
3282
3283static bool isShiftedMask(uint64_t Mask, EVT VT) {
3284 assert(VT == MVT::i32 || VT == MVT::i64);
3285 if (VT == MVT::i32)
3286 return isShiftedMask_32(Mask);
3287 return isShiftedMask_64(Mask);
3288}
3289
3290// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3291// inserted only sets known zero bits.
3293 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3294
3295 EVT VT = N->getValueType(0);
3296 if (VT != MVT::i32 && VT != MVT::i64)
3297 return false;
3298
3299 unsigned BitWidth = VT.getSizeInBits();
3300
3301 uint64_t OrImm;
3302 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3303 return false;
3304
3305 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3306 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3307 // performance neutral.
3309 return false;
3310
3311 uint64_t MaskImm;
3312 SDValue And = N->getOperand(0);
3313 // Must be a single use AND with an immediate operand.
3314 if (!And.hasOneUse() ||
3315 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3316 return false;
3317
3318 // Compute the Known Zero for the AND as this allows us to catch more general
3319 // cases than just looking for AND with imm.
3320 KnownBits Known = CurDAG->computeKnownBits(And);
3321
3322 // Non-zero in the sense that they're not provably zero, which is the key
3323 // point if we want to use this value.
3324 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3325
3326 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3327 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3328 return false;
3329
3330 // The bits being inserted must only set those bits that are known to be zero.
3331 if ((OrImm & NotKnownZero) != 0) {
3332 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3333 // currently handle this case.
3334 return false;
3335 }
3336
3337 // BFI/BFXIL dst, src, #lsb, #width.
3338 int LSB = llvm::countr_one(NotKnownZero);
3339 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3340
3341 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3342 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3343 unsigned ImmS = Width - 1;
3344
3345 // If we're creating a BFI instruction avoid cases where we need more
3346 // instructions to materialize the BFI constant as compared to the original
3347 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3348 // should be no worse in this case.
3349 bool IsBFI = LSB != 0;
3350 uint64_t BFIImm = OrImm >> LSB;
3351 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3352 // We have a BFI instruction and we know the constant can't be materialized
3353 // with a ORR-immediate with the zero register.
3354 unsigned OrChunks = 0, BFIChunks = 0;
3355 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3356 if (((OrImm >> Shift) & 0xFFFF) != 0)
3357 ++OrChunks;
3358 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3359 ++BFIChunks;
3360 }
3361 if (BFIChunks > OrChunks)
3362 return false;
3363 }
3364
3365 // Materialize the constant to be inserted.
3366 SDLoc DL(N);
3367 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3368 SDNode *MOVI = CurDAG->getMachineNode(
3369 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3370
3371 // Create the BFI/BFXIL instruction.
3372 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3373 CurDAG->getTargetConstant(ImmR, DL, VT),
3374 CurDAG->getTargetConstant(ImmS, DL, VT)};
3375 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3376 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3377 return true;
3378}
3379
3381 SDValue &ShiftedOperand,
3382 uint64_t &EncodedShiftImm) {
3383 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3384 if (!Dst.hasOneUse())
3385 return false;
3386
3387 EVT VT = Dst.getValueType();
3388 assert((VT == MVT::i32 || VT == MVT::i64) &&
3389 "Caller should guarantee that VT is one of i32 or i64");
3390 const unsigned SizeInBits = VT.getSizeInBits();
3391
3392 SDLoc DL(Dst.getNode());
3393 uint64_t AndImm, ShlImm;
3394 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3395 isShiftedMask_64(AndImm)) {
3396 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3397 SDValue DstOp0 = Dst.getOperand(0);
3398 if (!DstOp0.hasOneUse())
3399 return false;
3400
3401 // An example to illustrate the transformation
3402 // From:
3403 // lsr x8, x1, #1
3404 // and x8, x8, #0x3f80
3405 // bfxil x8, x1, #0, #7
3406 // To:
3407 // and x8, x23, #0x7f
3408 // ubfx x9, x23, #8, #7
3409 // orr x23, x8, x9, lsl #7
3410 //
3411 // The number of instructions remains the same, but ORR is faster than BFXIL
3412 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3413 // the dependency chain is improved after the transformation.
3414 uint64_t SrlImm;
3415 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3416 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3417 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3418 unsigned MaskWidth =
3419 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3420 unsigned UBFMOpc =
3421 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3422 SDNode *UBFMNode = CurDAG->getMachineNode(
3423 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3424 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3425 VT),
3426 CurDAG->getTargetConstant(
3427 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3428 ShiftedOperand = SDValue(UBFMNode, 0);
3429 EncodedShiftImm = AArch64_AM::getShifterImm(
3430 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3431 return true;
3432 }
3433 }
3434 return false;
3435 }
3436
3437 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3438 ShiftedOperand = Dst.getOperand(0);
3439 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3440 return true;
3441 }
3442
3443 uint64_t SrlImm;
3444 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3445 ShiftedOperand = Dst.getOperand(0);
3446 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3447 return true;
3448 }
3449 return false;
3450}
3451
3452// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3453// the operands and select it to AArch64::ORR with shifted registers if
3454// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3455static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3456 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3457 const bool BiggerPattern) {
3458 EVT VT = N->getValueType(0);
3459 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3460 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3461 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3462 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3463 assert((VT == MVT::i32 || VT == MVT::i64) &&
3464 "Expect result type to be i32 or i64 since N is combinable to BFM");
3465 SDLoc DL(N);
3466
3467 // Bail out if BFM simplifies away one node in BFM Dst.
3468 if (OrOpd1 != Dst)
3469 return false;
3470
3471 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3472 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3473 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3474 if (BiggerPattern) {
3475 uint64_t SrcAndImm;
3476 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3477 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3478 // OrOpd0 = AND Src, #Mask
3479 // So BFM simplifies away one AND node from Src and doesn't simplify away
3480 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3481 // one node (from Rd), ORR is better since it has higher throughput and
3482 // smaller latency than BFM on many AArch64 processors (and for the rest
3483 // ORR is at least as good as BFM).
3484 SDValue ShiftedOperand;
3485 uint64_t EncodedShiftImm;
3486 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3487 EncodedShiftImm)) {
3488 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3489 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3490 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3491 return true;
3492 }
3493 }
3494 return false;
3495 }
3496
3497 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3498
3499 uint64_t ShlImm;
3500 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3501 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3502 SDValue Ops[] = {
3503 Dst, Src,
3504 CurDAG->getTargetConstant(
3506 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3507 return true;
3508 }
3509
3510 // Select the following pattern to left-shifted operand rather than BFI.
3511 // %val1 = op ..
3512 // %val2 = shl %val1, #imm
3513 // %res = or %val1, %val2
3514 //
3515 // If N is selected to be BFI, we know that
3516 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3517 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3518 //
3519 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3520 if (OrOpd0.getOperand(0) == OrOpd1) {
3521 SDValue Ops[] = {
3522 OrOpd1, OrOpd1,
3523 CurDAG->getTargetConstant(
3525 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3526 return true;
3527 }
3528 }
3529
3530 uint64_t SrlImm;
3531 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3532 // Select the following pattern to right-shifted operand rather than BFXIL.
3533 // %val1 = op ..
3534 // %val2 = lshr %val1, #imm
3535 // %res = or %val1, %val2
3536 //
3537 // If N is selected to be BFXIL, we know that
3538 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3539 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3540 //
3541 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3542 if (OrOpd0.getOperand(0) == OrOpd1) {
3543 SDValue Ops[] = {
3544 OrOpd1, OrOpd1,
3545 CurDAG->getTargetConstant(
3547 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3548 return true;
3549 }
3550 }
3551
3552 return false;
3553}
3554
3555static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3556 SelectionDAG *CurDAG) {
3557 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3558
3559 EVT VT = N->getValueType(0);
3560 if (VT != MVT::i32 && VT != MVT::i64)
3561 return false;
3562
3563 unsigned BitWidth = VT.getSizeInBits();
3564
3565 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3566 // have the expected shape. Try to undo that.
3567
3568 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3569 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3570
3571 // Given a OR operation, check if we have the following pattern
3572 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3573 // isBitfieldExtractOp)
3574 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3575 // countTrailingZeros(mask2) == imm2 - imm + 1
3576 // f = d | c
3577 // if yes, replace the OR instruction with:
3578 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3579
3580 // OR is commutative, check all combinations of operand order and values of
3581 // BiggerPattern, i.e.
3582 // Opd0, Opd1, BiggerPattern=false
3583 // Opd1, Opd0, BiggerPattern=false
3584 // Opd0, Opd1, BiggerPattern=true
3585 // Opd1, Opd0, BiggerPattern=true
3586 // Several of these combinations may match, so check with BiggerPattern=false
3587 // first since that will produce better results by matching more instructions
3588 // and/or inserting fewer extra instructions.
3589 for (int I = 0; I < 4; ++I) {
3590
3591 SDValue Dst, Src;
3592 unsigned ImmR, ImmS;
3593 bool BiggerPattern = I / 2;
3594 SDValue OrOpd0Val = N->getOperand(I % 2);
3595 SDNode *OrOpd0 = OrOpd0Val.getNode();
3596 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3597 SDNode *OrOpd1 = OrOpd1Val.getNode();
3598
3599 unsigned BFXOpc;
3600 int DstLSB, Width;
3601 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3602 NumberOfIgnoredLowBits, BiggerPattern)) {
3603 // Check that the returned opcode is compatible with the pattern,
3604 // i.e., same type and zero extended (U and not S)
3605 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3606 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3607 continue;
3608
3609 // Compute the width of the bitfield insertion
3610 DstLSB = 0;
3611 Width = ImmS - ImmR + 1;
3612 // FIXME: This constraint is to catch bitfield insertion we may
3613 // want to widen the pattern if we want to grab general bitfied
3614 // move case
3615 if (Width <= 0)
3616 continue;
3617
3618 // If the mask on the insertee is correct, we have a BFXIL operation. We
3619 // can share the ImmR and ImmS values from the already-computed UBFM.
3620 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3621 BiggerPattern,
3622 Src, DstLSB, Width)) {
3623 ImmR = (BitWidth - DstLSB) % BitWidth;
3624 ImmS = Width - 1;
3625 } else
3626 continue;
3627
3628 // Check the second part of the pattern
3629 EVT VT = OrOpd1Val.getValueType();
3630 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3631
3632 // Compute the Known Zero for the candidate of the first operand.
3633 // This allows to catch more general case than just looking for
3634 // AND with imm. Indeed, simplify-demanded-bits may have removed
3635 // the AND instruction because it proves it was useless.
3636 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3637
3638 // Check if there is enough room for the second operand to appear
3639 // in the first one
3640 APInt BitsToBeInserted =
3641 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3642
3643 if ((BitsToBeInserted & ~Known.Zero) != 0)
3644 continue;
3645
3646 // Set the first operand
3647 uint64_t Imm;
3648 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3649 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3650 // In that case, we can eliminate the AND
3651 Dst = OrOpd1->getOperand(0);
3652 else
3653 // Maybe the AND has been removed by simplify-demanded-bits
3654 // or is useful because it discards more bits
3655 Dst = OrOpd1Val;
3656
3657 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3658 // with shifted operand is more efficient.
3659 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3660 BiggerPattern))
3661 return true;
3662
3663 // both parts match
3664 SDLoc DL(N);
3665 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3666 CurDAG->getTargetConstant(ImmS, DL, VT)};
3667 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3668 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3669 return true;
3670 }
3671
3672 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3673 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3674 // mask (e.g., 0x000ffff0).
3675 uint64_t Mask0Imm, Mask1Imm;
3676 SDValue And0 = N->getOperand(0);
3677 SDValue And1 = N->getOperand(1);
3678 if (And0.hasOneUse() && And1.hasOneUse() &&
3679 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3680 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3681 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3682 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3683
3684 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3685 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3686 // bits to be inserted.
3687 if (isShiftedMask(Mask0Imm, VT)) {
3688 std::swap(And0, And1);
3689 std::swap(Mask0Imm, Mask1Imm);
3690 }
3691
3692 SDValue Src = And1->getOperand(0);
3693 SDValue Dst = And0->getOperand(0);
3694 unsigned LSB = llvm::countr_zero(Mask1Imm);
3695 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3696
3697 // The BFXIL inserts the low-order bits from a source register, so right
3698 // shift the needed bits into place.
3699 SDLoc DL(N);
3700 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3701 uint64_t LsrImm = LSB;
3702 if (Src->hasOneUse() &&
3703 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3704 (LsrImm + LSB) < BitWidth) {
3705 Src = Src->getOperand(0);
3706 LsrImm += LSB;
3707 }
3708
3709 SDNode *LSR = CurDAG->getMachineNode(
3710 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3711 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3712
3713 // BFXIL is an alias of BFM, so translate to BFM operands.
3714 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3715 unsigned ImmS = Width - 1;
3716
3717 // Create the BFXIL instruction.
3718 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3719 CurDAG->getTargetConstant(ImmR, DL, VT),
3720 CurDAG->getTargetConstant(ImmS, DL, VT)};
3721 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3722 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3723 return true;
3724 }
3725
3726 return false;
3727}
3728
3729bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3730 if (N->getOpcode() != ISD::OR)
3731 return false;
3732
3733 APInt NUsefulBits;
3734 getUsefulBits(SDValue(N, 0), NUsefulBits);
3735
3736 // If all bits are not useful, just return UNDEF.
3737 if (!NUsefulBits) {
3738 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3739 return true;
3740 }
3741
3742 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3743 return true;
3744
3745 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3746}
3747
3748/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3749/// equivalent of a left shift by a constant amount followed by an and masking
3750/// out a contiguous set of bits.
3751bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3752 if (N->getOpcode() != ISD::AND)
3753 return false;
3754
3755 EVT VT = N->getValueType(0);
3756 if (VT != MVT::i32 && VT != MVT::i64)
3757 return false;
3758
3759 SDValue Op0;
3760 int DstLSB, Width;
3761 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3762 Op0, DstLSB, Width))
3763 return false;
3764
3765 // ImmR is the rotate right amount.
3766 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3767 // ImmS is the most significant bit of the source to be moved.
3768 unsigned ImmS = Width - 1;
3769
3770 SDLoc DL(N);
3771 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3772 CurDAG->getTargetConstant(ImmS, DL, VT)};
3773 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3774 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3775 return true;
3776}
3777
3778/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3779/// variable shift/rotate instructions.
3780bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3781 EVT VT = N->getValueType(0);
3782
3783 unsigned Opc;
3784 switch (N->getOpcode()) {
3785 case ISD::ROTR:
3786 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3787 break;
3788 case ISD::SHL:
3789 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3790 break;
3791 case ISD::SRL:
3792 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3793 break;
3794 case ISD::SRA:
3795 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3796 break;
3797 default:
3798 return false;
3799 }
3800
3801 uint64_t Size;
3802 uint64_t Bits;
3803 if (VT == MVT::i32) {
3804 Bits = 5;
3805 Size = 32;
3806 } else if (VT == MVT::i64) {
3807 Bits = 6;
3808 Size = 64;
3809 } else
3810 return false;
3811
3812 SDValue ShiftAmt = N->getOperand(1);
3813 SDLoc DL(N);
3814 SDValue NewShiftAmt;
3815
3816 // Skip over an extend of the shift amount.
3817 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3818 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3819 ShiftAmt = ShiftAmt->getOperand(0);
3820
3821 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3822 SDValue Add0 = ShiftAmt->getOperand(0);
3823 SDValue Add1 = ShiftAmt->getOperand(1);
3824 uint64_t Add0Imm;
3825 uint64_t Add1Imm;
3826 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3827 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3828 // to avoid the ADD/SUB.
3829 NewShiftAmt = Add0;
3830 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3831 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3832 (Add0Imm % Size == 0)) {
3833 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3834 // to generate a NEG instead of a SUB from a constant.
3835 unsigned NegOpc;
3836 unsigned ZeroReg;
3837 EVT SubVT = ShiftAmt->getValueType(0);
3838 if (SubVT == MVT::i32) {
3839 NegOpc = AArch64::SUBWrr;
3840 ZeroReg = AArch64::WZR;
3841 } else {
3842 assert(SubVT == MVT::i64);
3843 NegOpc = AArch64::SUBXrr;
3844 ZeroReg = AArch64::XZR;
3845 }
3846 SDValue Zero =
3847 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3848 MachineSDNode *Neg =
3849 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3850 NewShiftAmt = SDValue(Neg, 0);
3851 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3852 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3853 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3854 // to generate a NOT instead of a SUB from a constant.
3855 unsigned NotOpc;
3856 unsigned ZeroReg;
3857 EVT SubVT = ShiftAmt->getValueType(0);
3858 if (SubVT == MVT::i32) {
3859 NotOpc = AArch64::ORNWrr;
3860 ZeroReg = AArch64::WZR;
3861 } else {
3862 assert(SubVT == MVT::i64);
3863 NotOpc = AArch64::ORNXrr;
3864 ZeroReg = AArch64::XZR;
3865 }
3866 SDValue Zero =
3867 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3869 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3870 NewShiftAmt = SDValue(Not, 0);
3871 } else
3872 return false;
3873 } else {
3874 // If the shift amount is masked with an AND, check that the mask covers the
3875 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3876 // the AND.
3877 uint64_t MaskImm;
3878 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3879 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3880 return false;
3881
3882 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3883 return false;
3884
3885 NewShiftAmt = ShiftAmt->getOperand(0);
3886 }
3887
3888 // Narrow/widen the shift amount to match the size of the shift operation.
3889 if (VT == MVT::i32)
3890 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3891 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3892 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3893 MachineSDNode *Ext = CurDAG->getMachineNode(
3894 AArch64::SUBREG_TO_REG, DL, VT,
3895 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3896 NewShiftAmt = SDValue(Ext, 0);
3897 }
3898
3899 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3900 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3901 return true;
3902}
3903
3905 SDValue &FixedPos,
3906 unsigned RegWidth,
3907 bool isReciprocal) {
3908 APFloat FVal(0.0);
3909 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
3910 FVal = CN->getValueAPF();
3911 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3912 // Some otherwise illegal constants are allowed in this case.
3913 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3914 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3915 return false;
3916
3917 ConstantPoolSDNode *CN =
3918 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3919 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3920 } else
3921 return false;
3922
3923 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3924 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3925 // x-register.
3926 //
3927 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3928 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3929 // integers.
3930 bool IsExact;
3931
3932 if (isReciprocal)
3933 if (!FVal.getExactInverse(&FVal))
3934 return false;
3935
3936 // fbits is between 1 and 64 in the worst-case, which means the fmul
3937 // could have 2^64 as an actual operand. Need 65 bits of precision.
3938 APSInt IntVal(65, true);
3939 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
3940
3941 // N.b. isPowerOf2 also checks for > 0.
3942 if (!IsExact || !IntVal.isPowerOf2())
3943 return false;
3944 unsigned FBits = IntVal.logBase2();
3945
3946 // Checks above should have guaranteed that we haven't lost information in
3947 // finding FBits, but it must still be in range.
3948 if (FBits == 0 || FBits > RegWidth) return false;
3949
3950 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
3951 return true;
3952}
3953
3954bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3955 unsigned RegWidth) {
3956 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3957 false);
3958}
3959
3960bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
3961 SDValue &FixedPos,
3962 unsigned RegWidth) {
3963 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3964 true);
3965}
3966
3967// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3968// of the string and obtains the integer values from them and combines these
3969// into a single value to be used in the MRS/MSR instruction.
3972 RegString.split(Fields, ':');
3973
3974 if (Fields.size() == 1)
3975 return -1;
3976
3977 assert(Fields.size() == 5
3978 && "Invalid number of fields in read register string");
3979
3981 bool AllIntFields = true;
3982
3983 for (StringRef Field : Fields) {
3984 unsigned IntField;
3985 AllIntFields &= !Field.getAsInteger(10, IntField);
3986 Ops.push_back(IntField);
3987 }
3988
3989 assert(AllIntFields &&
3990 "Unexpected non-integer value in special register string.");
3991 (void)AllIntFields;
3992
3993 // Need to combine the integer fields of the string into a single value
3994 // based on the bit encoding of MRS/MSR instruction.
3995 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
3996 (Ops[3] << 3) | (Ops[4]);
3997}
3998
3999// Lower the read_register intrinsic to an MRS instruction node if the special
4000// register string argument is either of the form detailed in the ALCE (the
4001// form described in getIntOperandsFromRegsterString) or is a named register
4002// known by the MRS SysReg mapper.
4003bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4004 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4005 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4006 SDLoc DL(N);
4007
4008 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4009
4010 unsigned Opcode64Bit = AArch64::MRS;
4011 int Imm = getIntOperandFromRegisterString(RegString->getString());
4012 if (Imm == -1) {
4013 // No match, Use the sysreg mapper to map the remaining possible strings to
4014 // the value for the register to be used for the instruction operand.
4015 const auto *TheReg =
4016 AArch64SysReg::lookupSysRegByName(RegString->getString());
4017 if (TheReg && TheReg->Readable &&
4018 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4019 Imm = TheReg->Encoding;
4020 else
4021 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4022
4023 if (Imm == -1) {
4024 // Still no match, see if this is "pc" or give up.
4025 if (!ReadIs128Bit && RegString->getString() == "pc") {
4026 Opcode64Bit = AArch64::ADR;
4027 Imm = 0;
4028 } else {
4029 return false;
4030 }
4031 }
4032 }
4033
4034 SDValue InChain = N->getOperand(0);
4035 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4036 if (!ReadIs128Bit) {
4037 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4038 {SysRegImm, InChain});
4039 } else {
4040 SDNode *MRRS = CurDAG->getMachineNode(
4041 AArch64::MRRS, DL,
4042 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4043 {SysRegImm, InChain});
4044
4045 // Sysregs are not endian. The even register always contains the low half
4046 // of the register.
4047 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4048 SDValue(MRRS, 0));
4049 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4050 SDValue(MRRS, 0));
4051 SDValue OutChain = SDValue(MRRS, 1);
4052
4053 ReplaceUses(SDValue(N, 0), Lo);
4054 ReplaceUses(SDValue(N, 1), Hi);
4055 ReplaceUses(SDValue(N, 2), OutChain);
4056 };
4057 return true;
4058}
4059
4060// Lower the write_register intrinsic to an MSR instruction node if the special
4061// register string argument is either of the form detailed in the ALCE (the
4062// form described in getIntOperandsFromRegsterString) or is a named register
4063// known by the MSR SysReg mapper.
4064bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4065 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4066 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4067 SDLoc DL(N);
4068
4069 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4070
4071 if (!WriteIs128Bit) {
4072 // Check if the register was one of those allowed as the pstatefield value
4073 // in the MSR (immediate) instruction. To accept the values allowed in the
4074 // pstatefield for the MSR (immediate) instruction, we also require that an
4075 // immediate value has been provided as an argument, we know that this is
4076 // the case as it has been ensured by semantic checking.
4077 auto trySelectPState = [&](auto PMapper, unsigned State) {
4078 if (PMapper) {
4079 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4080 "Expected a constant integer expression.");
4081 unsigned Reg = PMapper->Encoding;
4082 uint64_t Immed = N->getConstantOperandVal(2);
4083 CurDAG->SelectNodeTo(
4084 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4085 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4086 return true;
4087 }
4088 return false;
4089 };
4090
4091 if (trySelectPState(
4092 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4093 AArch64::MSRpstateImm4))
4094 return true;
4095 if (trySelectPState(
4096 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4097 AArch64::MSRpstateImm1))
4098 return true;
4099 }
4100
4101 int Imm = getIntOperandFromRegisterString(RegString->getString());
4102 if (Imm == -1) {
4103 // Use the sysreg mapper to attempt to map the remaining possible strings
4104 // to the value for the register to be used for the MSR (register)
4105 // instruction operand.
4106 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4107 if (TheReg && TheReg->Writeable &&
4108 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4109 Imm = TheReg->Encoding;
4110 else
4111 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4112
4113 if (Imm == -1)
4114 return false;
4115 }
4116
4117 SDValue InChain = N->getOperand(0);
4118 if (!WriteIs128Bit) {
4119 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4120 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4121 N->getOperand(2), InChain);
4122 } else {
4123 // No endian swap. The lower half always goes into the even subreg, and the
4124 // higher half always into the odd supreg.
4125 SDNode *Pair = CurDAG->getMachineNode(
4126 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4127 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4128 MVT::i32),
4129 N->getOperand(2),
4130 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4131 N->getOperand(3),
4132 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4133
4134 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4135 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4136 SDValue(Pair, 0), InChain);
4137 }
4138
4139 return true;
4140}
4141
4142/// We've got special pseudo-instructions for these
4143bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4144 unsigned Opcode;
4145 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4146
4147 // Leave IR for LSE if subtarget supports it.
4148 if (Subtarget->hasLSE()) return false;
4149
4150 if (MemTy == MVT::i8)
4151 Opcode = AArch64::CMP_SWAP_8;
4152 else if (MemTy == MVT::i16)
4153 Opcode = AArch64::CMP_SWAP_16;
4154 else if (MemTy == MVT::i32)
4155 Opcode = AArch64::CMP_SWAP_32;
4156 else if (MemTy == MVT::i64)
4157 Opcode = AArch64::CMP_SWAP_64;
4158 else
4159 llvm_unreachable("Unknown AtomicCmpSwap type");
4160
4161 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4162 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4163 N->getOperand(0)};
4164 SDNode *CmpSwap = CurDAG->getMachineNode(
4165 Opcode, SDLoc(N),
4166 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4167
4168 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4169 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4170
4171 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4172 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4173 CurDAG->RemoveDeadNode(N);
4174
4175 return true;
4176}
4177
4178bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4179 SDValue &Shift) {
4180 if (!isa<ConstantSDNode>(N))
4181 return false;
4182
4183 SDLoc DL(N);
4184 uint64_t Val = cast<ConstantSDNode>(N)
4185 ->getAPIntValue()
4186 .trunc(VT.getFixedSizeInBits())
4187 .getZExtValue();
4188
4189 switch (VT.SimpleTy) {
4190 case MVT::i8:
4191 // All immediates are supported.
4192 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4193 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4194 return true;
4195 case MVT::i16:
4196 case MVT::i32:
4197 case MVT::i64:
4198 // Support 8bit unsigned immediates.
4199 if (Val <= 255) {
4200 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4201 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4202 return true;
4203 }
4204 // Support 16bit unsigned immediates that are a multiple of 256.
4205 if (Val <= 65280 && Val % 256 == 0) {
4206 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4207 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4208 return true;
4209 }
4210 break;
4211 default:
4212 break;
4213 }
4214
4215 return false;
4216}
4217
4218bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4219 SDValue &Imm, SDValue &Shift,
4220 bool Negate) {
4221 if (!isa<ConstantSDNode>(N))
4222 return false;
4223
4224 SDLoc DL(N);
4225 int64_t Val = cast<ConstantSDNode>(N)
4226 ->getAPIntValue()
4227 .trunc(VT.getFixedSizeInBits())
4228 .getSExtValue();
4229
4230 if (Negate)
4231 Val = -Val;
4232
4233 // Signed saturating instructions treat their immediate operand as unsigned,
4234 // whereas the related intrinsics define their operands to be signed. This
4235 // means we can only use the immediate form when the operand is non-negative.
4236 if (Val < 0)
4237 return false;
4238
4239 switch (VT.SimpleTy) {
4240 case MVT::i8:
4241 // All positive immediates are supported.
4242 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4243 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4244 return true;
4245 case MVT::i16:
4246 case MVT::i32:
4247 case MVT::i64:
4248 // Support 8bit positive immediates.
4249 if (Val <= 255) {
4250 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4251 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4252 return true;
4253 }
4254 // Support 16bit positive immediates that are a multiple of 256.
4255 if (Val <= 65280 && Val % 256 == 0) {
4256 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4257 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4258 return true;
4259 }
4260 break;
4261 default:
4262 break;
4263 }
4264
4265 return false;
4266}
4267
4268bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4269 SDValue &Shift) {
4270 if (!isa<ConstantSDNode>(N))
4271 return false;
4272
4273 SDLoc DL(N);
4274 int64_t Val = cast<ConstantSDNode>(N)
4275 ->getAPIntValue()
4276 .trunc(VT.getFixedSizeInBits())
4277 .getSExtValue();
4278
4279 switch (VT.SimpleTy) {
4280 case MVT::i8:
4281 // All immediates are supported.
4282 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4283 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4284 return true;
4285 case MVT::i16:
4286 case MVT::i32:
4287 case MVT::i64:
4288 // Support 8bit signed immediates.
4289 if (Val >= -128 && Val <= 127) {
4290 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4291 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4292 return true;
4293 }
4294 // Support 16bit signed immediates that are a multiple of 256.
4295 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
4296 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4297 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
4298 return true;
4299 }
4300 break;
4301 default:
4302 break;
4303 }
4304
4305 return false;
4306}
4307
4308bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4309 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4310 int64_t ImmVal = CNode->getSExtValue();
4311 SDLoc DL(N);
4312 if (ImmVal >= -128 && ImmVal < 128) {
4313 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4314 return true;
4315 }
4316 }
4317 return false;
4318}
4319
4320bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4321 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4322 uint64_t ImmVal = CNode->getZExtValue();
4323
4324 switch (VT.SimpleTy) {
4325 case MVT::i8:
4326 ImmVal &= 0xFF;
4327 break;
4328 case MVT::i16:
4329 ImmVal &= 0xFFFF;
4330 break;
4331 case MVT::i32:
4332 ImmVal &= 0xFFFFFFFF;
4333 break;
4334 case MVT::i64:
4335 break;
4336 default:
4337 llvm_unreachable("Unexpected type");
4338 }
4339
4340 if (ImmVal < 256) {
4341 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4342 return true;
4343 }
4344 }
4345 return false;
4346}
4347
4348bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4349 bool Invert) {
4350 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4351 uint64_t ImmVal = CNode->getZExtValue();
4352 SDLoc DL(N);
4353
4354 if (Invert)
4355 ImmVal = ~ImmVal;
4356
4357 // Shift mask depending on type size.
4358 switch (VT.SimpleTy) {
4359 case MVT::i8:
4360 ImmVal &= 0xFF;
4361 ImmVal |= ImmVal << 8;
4362 ImmVal |= ImmVal << 16;
4363 ImmVal |= ImmVal << 32;
4364 break;
4365 case MVT::i16:
4366 ImmVal &= 0xFFFF;
4367 ImmVal |= ImmVal << 16;
4368 ImmVal |= ImmVal << 32;
4369 break;
4370 case MVT::i32:
4371 ImmVal &= 0xFFFFFFFF;
4372 ImmVal |= ImmVal << 32;
4373 break;
4374 case MVT::i64:
4375 break;
4376 default:
4377 llvm_unreachable("Unexpected type");
4378 }
4379
4380 uint64_t encoding;
4381 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4382 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4383 return true;
4384 }
4385 }
4386 return false;
4387}
4388
4389// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4390// Rather than attempt to normalise everything we can sometimes saturate the
4391// shift amount during selection. This function also allows for consistent
4392// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4393// required by the instructions.
4394bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4395 uint64_t High, bool AllowSaturation,
4396 SDValue &Imm) {
4397 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4398 uint64_t ImmVal = CN->getZExtValue();
4399
4400 // Reject shift amounts that are too small.
4401 if (ImmVal < Low)
4402 return false;
4403
4404 // Reject or saturate shift amounts that are too big.
4405 if (ImmVal > High) {
4406 if (!AllowSaturation)
4407 return false;
4408 ImmVal = High;
4409 }
4410
4411 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4412 return true;
4413 }
4414
4415 return false;
4416}
4417
4418bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4419 // tagp(FrameIndex, IRGstack, tag_offset):
4420 // since the offset between FrameIndex and IRGstack is a compile-time
4421 // constant, this can be lowered to a single ADDG instruction.
4422 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4423 return false;
4424 }
4425
4426 SDValue IRG_SP = N->getOperand(2);
4427 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4428 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4429 return false;
4430 }
4431
4432 const TargetLowering *TLI = getTargetLowering();
4433 SDLoc DL(N);
4434 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4435 SDValue FiOp = CurDAG->getTargetFrameIndex(
4436 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4437 int TagOffset = N->getConstantOperandVal(3);
4438
4439 SDNode *Out = CurDAG->getMachineNode(
4440 AArch64::TAGPstack, DL, MVT::i64,
4441 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4442 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4443 ReplaceNode(N, Out);
4444 return true;
4445}
4446
4447void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4448 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4449 "llvm.aarch64.tagp third argument must be an immediate");
4450 if (trySelectStackSlotTagP(N))
4451 return;
4452 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4453 // compile-time constant, not just for stack allocations.
4454
4455 // General case for unrelated pointers in Op1 and Op2.
4456 SDLoc DL(N);
4457 int TagOffset = N->getConstantOperandVal(3);
4458 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4459 {N->getOperand(1), N->getOperand(2)});
4460 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4461 {SDValue(N1, 0), N->getOperand(2)});
4462 SDNode *N3 = CurDAG->getMachineNode(
4463 AArch64::ADDG, DL, MVT::i64,
4464 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4465 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4466 ReplaceNode(N, N3);
4467}
4468
4469bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4470 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4471
4472 // Bail when not a "cast" like insert_subvector.
4473 if (N->getConstantOperandVal(2) != 0)
4474 return false;
4475 if (!N->getOperand(0).isUndef())
4476 return false;
4477
4478 // Bail when normal isel should do the job.
4479 EVT VT = N->getValueType(0);
4480 EVT InVT = N->getOperand(1).getValueType();
4481 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4482 return false;
4483 if (InVT.getSizeInBits() <= 128)
4484 return false;
4485
4486 // NOTE: We can only get here when doing fixed length SVE code generation.
4487 // We do manual selection because the types involved are not linked to real
4488 // registers (despite being legal) and must be coerced into SVE registers.
4489
4491 "Expected to insert into a packed scalable vector!");
4492
4493 SDLoc DL(N);
4494 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4495 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4496 N->getOperand(1), RC));
4497 return true;
4498}
4499
4500bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4501 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4502
4503 // Bail when not a "cast" like extract_subvector.
4504 if (N->getConstantOperandVal(1) != 0)
4505 return false;
4506
4507 // Bail when normal isel can do the job.
4508 EVT VT = N->getValueType(0);
4509 EVT InVT = N->getOperand(0).getValueType();
4510 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4511 return false;
4512 if (VT.getSizeInBits() <= 128)
4513 return false;
4514
4515 // NOTE: We can only get here when doing fixed length SVE code generation.
4516 // We do manual selection because the types involved are not linked to real
4517 // registers (despite being legal) and must be coerced into SVE registers.
4518
4520 "Expected to extract from a packed scalable vector!");
4521
4522 SDLoc DL(N);
4523 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4524 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4525 N->getOperand(0), RC));
4526 return true;
4527}
4528
4529bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4530 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4531
4532 SDValue N0 = N->getOperand(0);
4533 SDValue N1 = N->getOperand(1);
4534 EVT VT = N->getValueType(0);
4535
4536 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4537 // Rotate by a constant is a funnel shift in IR which is exanded to
4538 // an OR with shifted operands.
4539 // We do the following transform:
4540 // OR N0, N1 -> xar (x, y, imm)
4541 // Where:
4542 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4543 // N0 = SHL_PRED true, V, splat(bits-imm)
4544 // V = (xor x, y)
4545 if (VT.isScalableVector() &&
4546 (Subtarget->hasSVE2() ||
4547 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4548 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4550 std::swap(N0, N1);
4551 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4553 return false;
4554
4555 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4556 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4557 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4558 return false;
4559
4560 SDValue XOR = N0.getOperand(1);
4561 if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(1))
4562 return false;
4563
4564 APInt ShlAmt, ShrAmt;
4565 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4567 return false;
4568
4569 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4570 return false;
4571
4572 SDLoc DL(N);
4573 SDValue Imm =
4574 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4575
4576 SDValue Ops[] = {XOR.getOperand(0), XOR.getOperand(1), Imm};
4577 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4578 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4579 AArch64::XAR_ZZZI_D})) {
4580 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4581 return true;
4582 }
4583 return false;
4584 }
4585
4586 if (!Subtarget->hasSHA3())
4587 return false;
4588
4589 if (N0->getOpcode() != AArch64ISD::VSHL ||
4591 return false;
4592
4593 if (N0->getOperand(0) != N1->getOperand(0) ||
4594 N1->getOperand(0)->getOpcode() != ISD::XOR)
4595 return false;
4596
4597 SDValue XOR = N0.getOperand(0);
4598 SDValue R1 = XOR.getOperand(0);
4599 SDValue R2 = XOR.getOperand(1);
4600
4601 unsigned HsAmt = N0.getConstantOperandVal(1);
4602 unsigned ShAmt = N1.getConstantOperandVal(1);
4603
4604 SDLoc DL = SDLoc(N0.getOperand(1));
4605 SDValue Imm = CurDAG->getTargetConstant(
4606 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4607
4608 if (ShAmt + HsAmt != 64)
4609 return false;
4610
4611 SDValue Ops[] = {R1, R2, Imm};
4612 CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
4613
4614 return true;
4615}
4616
4617void AArch64DAGToDAGISel::Select(SDNode *Node) {
4618 // If we have a custom node, we already have selected!
4619 if (Node->isMachineOpcode()) {
4620 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4621 Node->setNodeId(-1);
4622 return;
4623 }
4624
4625 // Few custom selection stuff.
4626 EVT VT = Node->getValueType(0);
4627
4628 switch (Node->getOpcode()) {
4629 default:
4630 break;
4631
4633 if (SelectCMP_SWAP(Node))
4634 return;
4635 break;
4636
4637 case ISD::READ_REGISTER:
4638 case AArch64ISD::MRRS:
4639 if (tryReadRegister(Node))
4640 return;
4641 break;
4642
4644 case AArch64ISD::MSRR:
4645 if (tryWriteRegister(Node))
4646 return;
4647 break;
4648
4649 case ISD::LOAD: {
4650 // Try to select as an indexed load. Fall through to normal processing
4651 // if we can't.
4652 if (tryIndexedLoad(Node))
4653 return;
4654 break;
4655 }
4656
4657 case ISD::SRL:
4658 case ISD::AND:
4659 case ISD::SRA:
4661 if (tryBitfieldExtractOp(Node))
4662 return;
4663 if (tryBitfieldInsertInZeroOp(Node))
4664 return;
4665 [[fallthrough]];
4666 case ISD::ROTR:
4667 case ISD::SHL:
4668 if (tryShiftAmountMod(Node))
4669 return;
4670 break;
4671
4672 case ISD::SIGN_EXTEND:
4673 if (tryBitfieldExtractOpFromSExt(Node))
4674 return;
4675 break;
4676
4677 case ISD::OR:
4678 if (tryBitfieldInsertOp(Node))
4679 return;
4680 if (trySelectXAR(Node))
4681 return;
4682 break;
4683
4685 if (trySelectCastScalableToFixedLengthVector(Node))
4686 return;
4687 break;
4688 }
4689
4690 case ISD::INSERT_SUBVECTOR: {
4691 if (trySelectCastFixedLengthToScalableVector(Node))
4692 return;
4693 break;
4694 }
4695
4696 case ISD::Constant: {
4697 // Materialize zero constants as copies from WZR/XZR. This allows
4698 // the coalescer to propagate these into other instructions.
4699 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4700 if (ConstNode->isZero()) {
4701 if (VT == MVT::i32) {
4702 SDValue New = CurDAG->getCopyFromReg(
4703 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4704 ReplaceNode(Node, New.getNode());
4705 return;
4706 } else if (VT == MVT::i64) {
4707 SDValue New = CurDAG->getCopyFromReg(
4708 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4709 ReplaceNode(Node, New.getNode());
4710 return;
4711 }
4712 }
4713 break;
4714 }
4715
4716 case ISD::FrameIndex: {
4717 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4718 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4719 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4720 const TargetLowering *TLI = getTargetLowering();
4721 SDValue TFI = CurDAG->getTargetFrameIndex(
4722 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4723 SDLoc DL(Node);
4724 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4725 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4726 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4727 return;
4728 }
4730 unsigned IntNo = Node->getConstantOperandVal(1);
4731 switch (IntNo) {
4732 default:
4733 break;
4734 case Intrinsic::aarch64_gcsss: {
4735 SDLoc DL(Node);
4736 SDValue Chain = Node->getOperand(0);
4737 SDValue Val = Node->getOperand(2);
4738 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4739 SDNode *SS1 =
4740 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4741 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4742 MVT::Other, Zero, SDValue(SS1, 0));
4743 ReplaceNode(Node, SS2);
4744 return;
4745 }
4746 case Intrinsic::aarch64_ldaxp:
4747 case Intrinsic::aarch64_ldxp: {
4748 unsigned Op =
4749 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4750 SDValue MemAddr = Node->getOperand(2);
4751 SDLoc DL(Node);
4752 SDValue Chain = Node->getOperand(0);
4753
4754 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4755 MVT::Other, MemAddr, Chain);
4756
4757 // Transfer memoperands.
4759 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4760 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4761 ReplaceNode(Node, Ld);
4762 return;
4763 }
4764 case Intrinsic::aarch64_stlxp:
4765 case Intrinsic::aarch64_stxp: {
4766 unsigned Op =
4767 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4768 SDLoc DL(Node);
4769 SDValue Chain = Node->getOperand(0);
4770 SDValue ValLo = Node->getOperand(2);
4771 SDValue ValHi = Node->getOperand(3);
4772 SDValue MemAddr = Node->getOperand(4);
4773
4774 // Place arguments in the right order.
4775 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4776
4777 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4778 // Transfer memoperands.
4780 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4781 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4782
4783 ReplaceNode(Node, St);
4784 return;
4785 }
4786 case Intrinsic::aarch64_neon_ld1x2:
4787 if (VT == MVT::v8i8) {
4788 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4789 return;
4790 } else if (VT == MVT::v16i8) {
4791 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4792 return;
4793 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4794 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4795 return;
4796 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4797 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4798 return;
4799 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4800 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4801 return;
4802 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4803 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4804 return;
4805 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4806 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4807 return;
4808 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4809 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4810 return;
4811 }
4812 break;
4813 case Intrinsic::aarch64_neon_ld1x3:
4814 if (VT == MVT::v8i8) {
4815 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4816 return;
4817 } else if (VT == MVT::v16i8) {
4818 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4819 return;
4820 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4821 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4822 return;
4823 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4824 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4825 return;
4826 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4827 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4828 return;
4829 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4830 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4831 return;
4832 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4833 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4834 return;
4835 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4836 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
4837 return;
4838 }
4839 break;
4840 case Intrinsic::aarch64_neon_ld1x4:
4841 if (VT == MVT::v8i8) {
4842 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
4843 return;
4844 } else if (VT == MVT::v16i8) {
4845 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
4846 return;
4847 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4848 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
4849 return;
4850 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4851 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
4852 return;
4853 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4854 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
4855 return;
4856 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4857 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
4858 return;
4859 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4860 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4861 return;
4862 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4863 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
4864 return;
4865 }
4866 break;
4867 case Intrinsic::aarch64_neon_ld2:
4868 if (VT == MVT::v8i8) {
4869 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
4870 return;
4871 } else if (VT == MVT::v16i8) {
4872 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
4873 return;
4874 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4875 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
4876 return;
4877 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4878 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
4879 return;
4880 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4881 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
4882 return;
4883 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4884 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
4885 return;
4886 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4887 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4888 return;
4889 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4890 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
4891 return;
4892 }
4893 break;
4894 case Intrinsic::aarch64_neon_ld3:
4895 if (VT == MVT::v8i8) {
4896 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
4897 return;
4898 } else if (VT == MVT::v16i8) {
4899 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
4900 return;
4901 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4902 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
4903 return;
4904 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4905 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
4906 return;
4907 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4908 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
4909 return;
4910 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4911 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
4912 return;
4913 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4914 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4915 return;
4916 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4917 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
4918 return;
4919 }
4920 break;
4921 case Intrinsic::aarch64_neon_ld4:
4922 if (VT == MVT::v8i8) {
4923 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
4924 return;
4925 } else if (VT == MVT::v16i8) {
4926 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
4927 return;
4928 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4929 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
4930 return;
4931 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4932 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
4933 return;
4934 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4935 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
4936 return;
4937 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4938 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
4939 return;
4940 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4941 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4942 return;
4943 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4944 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
4945 return;
4946 }
4947 break;
4948 case Intrinsic::aarch64_neon_ld2r:
4949 if (VT == MVT::v8i8) {
4950 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
4951 return;
4952 } else if (VT == MVT::v16i8) {
4953 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
4954 return;
4955 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4956 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
4957 return;
4958 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4959 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
4960 return;
4961 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4962 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
4963 return;
4964 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4965 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
4966 return;
4967 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4968 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
4969 return;
4970 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4971 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
4972 return;
4973 }
4974 break;
4975 case Intrinsic::aarch64_neon_ld3r:
4976 if (VT == MVT::v8i8) {
4977 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
4978 return;
4979 } else if (VT == MVT::v16i8) {
4980 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
4981 return;
4982 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4983 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
4984 return;
4985 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4986 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
4987 return;
4988 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4989 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
4990 return;
4991 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4992 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
4993 return;
4994 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4995 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
4996 return;
4997 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4998 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
4999 return;
5000 }
5001 break;
5002 case Intrinsic::aarch64_neon_ld4r:
5003 if (VT == MVT::v8i8) {
5004 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5005 return;
5006 } else if (VT == MVT::v16i8) {
5007 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5008 return;
5009 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5010 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5011 return;
5012 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5013 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5014 return;
5015 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5016 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5017 return;
5018 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5019 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5020 return;
5021 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5022 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5023 return;
5024 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5025 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5026 return;
5027 }
5028 break;
5029 case Intrinsic::aarch64_neon_ld2lane:
5030 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5031 SelectLoadLane(Node, 2, AArch64::LD2i8);
5032 return;
5033 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5034 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5035 SelectLoadLane(Node, 2, AArch64::LD2i16);
5036 return;
5037 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5038 VT == MVT::v2f32) {
5039 SelectLoadLane(Node, 2, AArch64::LD2i32);
5040 return;
5041 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5042 VT == MVT::v1f64) {
5043 SelectLoadLane(Node, 2, AArch64::LD2i64);
5044 return;
5045 }
5046 break;
5047 case Intrinsic::aarch64_neon_ld3lane:
5048 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5049 SelectLoadLane(Node, 3, AArch64::LD3i8);
5050 return;
5051 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5052 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5053 SelectLoadLane(Node, 3, AArch64::LD3i16);
5054 return;
5055 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5056 VT == MVT::v2f32) {
5057 SelectLoadLane(Node, 3, AArch64::LD3i32);
5058 return;
5059 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5060 VT == MVT::v1f64) {
5061 SelectLoadLane(Node, 3, AArch64::LD3i64);
5062 return;
5063 }
5064 break;
5065 case Intrinsic::aarch64_neon_ld4lane:
5066 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5067 SelectLoadLane(Node, 4, AArch64::LD4i8);
5068 return;
5069 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5070 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5071 SelectLoadLane(Node, 4, AArch64::LD4i16);
5072 return;
5073 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5074 VT == MVT::v2f32) {
5075 SelectLoadLane(Node, 4, AArch64::LD4i32);
5076 return;
5077 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5078 VT == MVT::v1f64) {
5079 SelectLoadLane(Node, 4, AArch64::LD4i64);
5080 return;
5081 }
5082 break;
5083 case Intrinsic::aarch64_ld64b:
5084 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5085 return;
5086 case Intrinsic::aarch64_sve_ld2q_sret: {
5087 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5088 return;
5089 }
5090 case Intrinsic::aarch64_sve_ld3q_sret: {
5091 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5092 return;
5093 }
5094 case Intrinsic::aarch64_sve_ld4q_sret: {
5095 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5096 return;
5097 }
5098 case Intrinsic::aarch64_sve_ld2_sret: {
5099 if (VT == MVT::nxv16i8) {
5100 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5101 true);
5102 return;
5103 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5104 VT == MVT::nxv8bf16) {
5105 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5106 true);
5107 return;
5108 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5109 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5110 true);
5111 return;
5112 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5113 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5114 true);
5115 return;
5116 }
5117 break;
5118 }
5119 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5120 if (VT == MVT::nxv16i8) {
5121 if (Subtarget->hasSME2())
5122 SelectContiguousMultiVectorLoad(
5123 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5124 else if (Subtarget->hasSVE2p1())
5125 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5126 AArch64::LD1B_2Z);
5127 else
5128 break;
5129 return;
5130 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5131 VT == MVT::nxv8bf16) {
5132 if (Subtarget->hasSME2())
5133 SelectContiguousMultiVectorLoad(
5134 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5135 else if (Subtarget->hasSVE2p1())
5136 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5137 AArch64::LD1H_2Z);
5138 else
5139 break;
5140 return;
5141 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5142 if (Subtarget->hasSME2())
5143 SelectContiguousMultiVectorLoad(
5144 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5145 else if (Subtarget->hasSVE2p1())
5146 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5147 AArch64::LD1W_2Z);
5148 else
5149 break;
5150 return;
5151 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5152 if (Subtarget->hasSME2())
5153 SelectContiguousMultiVectorLoad(
5154 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5155 else if (Subtarget->hasSVE2p1())
5156 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5157 AArch64::LD1D_2Z);
5158 else
5159 break;
5160 return;
5161 }
5162 break;
5163 }
5164 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5165 if (VT == MVT::nxv16i8) {
5166 if (Subtarget->hasSME2())
5167 SelectContiguousMultiVectorLoad(
5168 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5169 else if (Subtarget->hasSVE2p1())
5170 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5171 AArch64::LD1B_4Z);
5172 else
5173 break;
5174 return;
5175 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5176 VT == MVT::nxv8bf16) {
5177 if (Subtarget->hasSME2())
5178 SelectContiguousMultiVectorLoad(
5179 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5180 else if (Subtarget->hasSVE2p1())
5181 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5182 AArch64::LD1H_4Z);
5183 else
5184 break;
5185 return;
5186 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5187 if (Subtarget->hasSME2())
5188 SelectContiguousMultiVectorLoad(
5189 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5190 else if (Subtarget->hasSVE2p1())
5191 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5192 AArch64::LD1W_4Z);
5193 else
5194 break;
5195 return;
5196 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5197 if (Subtarget->hasSME2())
5198 SelectContiguousMultiVectorLoad(
5199 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5200 else if (Subtarget->hasSVE2p1())
5201 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5202 AArch64::LD1D_4Z);
5203 else
5204 break;
5205 return;
5206 }
5207 break;
5208 }
5209 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5210 if (VT == MVT::nxv16i8) {
5211 if (Subtarget->hasSME2())
5212 SelectContiguousMultiVectorLoad(Node, 2, 0,
5213 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5214 AArch64::LDNT1B_2Z_PSEUDO);
5215 else if (Subtarget->hasSVE2p1())
5216 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5217 AArch64::LDNT1B_2Z);
5218 else
5219 break;
5220 return;
5221 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5222 VT == MVT::nxv8bf16) {
5223 if (Subtarget->hasSME2())
5224 SelectContiguousMultiVectorLoad(Node, 2, 1,
5225 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5226 AArch64::LDNT1H_2Z_PSEUDO);
5227 else if (Subtarget->hasSVE2p1())
5228 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5229 AArch64::LDNT1H_2Z);
5230 else
5231 break;
5232 return;
5233 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5234 if (Subtarget->hasSME2())
5235 SelectContiguousMultiVectorLoad(Node, 2, 2,
5236 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5237 AArch64::LDNT1W_2Z_PSEUDO);
5238 else if (Subtarget->hasSVE2p1())
5239 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5240 AArch64::LDNT1W_2Z);
5241 else
5242 break;
5243 return;
5244 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5245 if (Subtarget->hasSME2())
5246 SelectContiguousMultiVectorLoad(Node, 2, 3,
5247 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5248 AArch64::LDNT1D_2Z_PSEUDO);
5249 else if (Subtarget->hasSVE2p1())
5250 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5251 AArch64::LDNT1D_2Z);
5252 else
5253 break;
5254 return;
5255 }
5256 break;
5257 }
5258 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5259 if (VT == MVT::nxv16i8) {
5260 if (Subtarget->hasSME2())
5261 SelectContiguousMultiVectorLoad(Node, 4, 0,
5262 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5263 AArch64::LDNT1B_4Z_PSEUDO);
5264 else if (Subtarget->hasSVE2p1())
5265 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5266 AArch64::LDNT1B_4Z);
5267 else
5268 break;
5269 return;
5270 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5271 VT == MVT::nxv8bf16) {
5272 if (Subtarget->hasSME2())
5273 SelectContiguousMultiVectorLoad(Node, 4, 1,
5274 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5275 AArch64::LDNT1H_4Z_PSEUDO);
5276 else if (Subtarget->hasSVE2p1())
5277 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5278 AArch64::LDNT1H_4Z);
5279 else
5280 break;
5281 return;
5282 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5283 if (Subtarget->hasSME2())
5284 SelectContiguousMultiVectorLoad(Node, 4, 2,
5285 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5286 AArch64::LDNT1W_4Z_PSEUDO);
5287 else if (Subtarget->hasSVE2p1())
5288 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5289 AArch64::LDNT1W_4Z);
5290 else
5291 break;
5292 return;
5293 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5294 if (Subtarget->hasSME2())
5295 SelectContiguousMultiVectorLoad(Node, 4, 3,
5296 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5297 AArch64::LDNT1D_4Z_PSEUDO);
5298 else if (Subtarget->hasSVE2p1())
5299 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5300 AArch64::LDNT1D_4Z);
5301 else
5302 break;
5303 return;
5304 }
5305 break;
5306 }
5307 case Intrinsic::aarch64_sve_ld3_sret: {
5308 if (VT == MVT::nxv16i8) {
5309 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5310 true);
5311 return;
5312 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5313 VT == MVT::nxv8bf16) {
5314 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5315 true);
5316 return;
5317 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5318 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5319 true);
5320 return;
5321 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5322 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5323 true);
5324 return;
5325 }
5326 break;
5327 }
5328 case Intrinsic::aarch64_sve_ld4_sret: {
5329 if (VT == MVT::nxv16i8) {
5330 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5331 true);
5332 return;
5333 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5334 VT == MVT::nxv8bf16) {
5335 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5336 true);
5337 return;
5338 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5339 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5340 true);
5341 return;
5342 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5343 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5344 true);
5345 return;
5346 }
5347 break;
5348 }
5349 case Intrinsic::aarch64_sme_read_hor_vg2: {
5350 if (VT == MVT::nxv16i8) {
5351 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5352 AArch64::MOVA_2ZMXI_H_B);
5353 return;
5354 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5355 VT == MVT::nxv8bf16) {
5356 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5357 AArch64::MOVA_2ZMXI_H_H);
5358 return;
5359 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5360 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5361 AArch64::MOVA_2ZMXI_H_S);
5362 return;
5363 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5364 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5365 AArch64::MOVA_2ZMXI_H_D);
5366 return;
5367 }
5368 break;
5369 }
5370 case Intrinsic::aarch64_sme_read_ver_vg2: {
5371 if (VT == MVT::nxv16i8) {
5372 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5373 AArch64::MOVA_2ZMXI_V_B);
5374 return;
5375 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5376 VT == MVT::nxv8bf16) {
5377 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5378 AArch64::MOVA_2ZMXI_V_H);
5379 return;
5380 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5381 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5382 AArch64::MOVA_2ZMXI_V_S);
5383 return;
5384 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5385 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5386 AArch64::MOVA_2ZMXI_V_D);
5387 return;
5388 }
5389 break;
5390 }
5391 case Intrinsic::aarch64_sme_read_hor_vg4: {
5392 if (VT == MVT::nxv16i8) {
5393 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5394 AArch64::MOVA_4ZMXI_H_B);
5395 return;
5396 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5397 VT == MVT::nxv8bf16) {
5398 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5399 AArch64::MOVA_4ZMXI_H_H);
5400 return;
5401 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5402 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5403 AArch64::MOVA_4ZMXI_H_S);
5404 return;
5405 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5406 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5407 AArch64::MOVA_4ZMXI_H_D);
5408 return;
5409 }
5410 break;
5411 }
5412 case Intrinsic::aarch64_sme_read_ver_vg4: {
5413 if (VT == MVT::nxv16i8) {
5414 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5415 AArch64::MOVA_4ZMXI_V_B);
5416 return;
5417 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5418 VT == MVT::nxv8bf16) {
5419 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5420 AArch64::MOVA_4ZMXI_V_H);
5421 return;
5422 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5423 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5424 AArch64::MOVA_4ZMXI_V_S);
5425 return;
5426 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5427 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5428 AArch64::MOVA_4ZMXI_V_D);
5429 return;
5430 }
5431 break;
5432 }
5433 case Intrinsic::aarch64_sme_read_vg1x2: {
5434 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5435 AArch64::MOVA_VG2_2ZMXI);
5436 return;
5437 }
5438 case Intrinsic::aarch64_sme_read_vg1x4: {
5439 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5440 AArch64::MOVA_VG4_4ZMXI);
5441 return;
5442 }
5443 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5444 if (VT == MVT::nxv16i8) {
5445 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5446 return;
5447 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5448 VT == MVT::nxv8bf16) {
5449 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5450 return;
5451 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5452 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5453 return;
5454 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5455 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5456 return;
5457 }
5458 break;
5459 }
5460 case Intrinsic::aarch64_sme_readz_vert_x2: {
5461 if (VT == MVT::nxv16i8) {
5462 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5463 return;
5464 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5465 VT == MVT::nxv8bf16) {
5466 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5467 return;
5468 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5469 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5470 return;
5471 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5472 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5473 return;
5474 }
5475 break;
5476 }
5477 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5478 if (VT == MVT::nxv16i8) {
5479 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5480 return;
5481 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5482 VT == MVT::nxv8bf16) {
5483 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5484 return;
5485 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5486 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5487 return;
5488 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5489 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5490 return;
5491 }
5492 break;
5493 }
5494 case Intrinsic::aarch64_sme_readz_vert_x4: {
5495 if (VT == MVT::nxv16i8) {
5496 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5497 return;
5498 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5499 VT == MVT::nxv8bf16) {
5500 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5501 return;
5502 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5503 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5504 return;
5505 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5506 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5507 return;
5508 }
5509 break;
5510 }
5511 case Intrinsic::aarch64_sme_readz_x2: {
5512 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5513 AArch64::ZA);
5514 return;
5515 }
5516 case Intrinsic::aarch64_sme_readz_x4: {
5517 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5518 AArch64::ZA);
5519 return;
5520 }
5521 case Intrinsic::swift_async_context_addr: {
5522 SDLoc DL(Node);
5523 SDValue Chain = Node->getOperand(0);
5524 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5525 SDValue Res = SDValue(
5526 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5527 CurDAG->getTargetConstant(8, DL, MVT::i32),
5528 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5529 0);
5530 ReplaceUses(SDValue(Node, 0), Res);
5531 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5532 CurDAG->RemoveDeadNode(Node);
5533
5534 auto &MF = CurDAG->getMachineFunction();
5535 MF.getFrameInfo().setFrameAddressIsTaken(true);
5536 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5537 return;
5538 }
5539 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5540 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5541 Node->getValueType(0),
5542 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5543 AArch64::LUTI2_4ZTZI_S}))
5544 // Second Immediate must be <= 3:
5545 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5546 return;
5547 }
5548 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5549 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5550 Node->getValueType(0),
5551 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5552 // Second Immediate must be <= 1:
5553 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5554 return;
5555 }
5556 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5557 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5558 Node->getValueType(0),
5559 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5560 AArch64::LUTI2_2ZTZI_S}))
5561 // Second Immediate must be <= 7:
5562 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5563 return;
5564 }
5565 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5566 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5567 Node->getValueType(0),
5568 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5569 AArch64::LUTI4_2ZTZI_S}))
5570 // Second Immediate must be <= 3:
5571 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5572 return;
5573 }
5574 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5575 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5576 return;
5577 }
5578 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5579 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5580 Node->getValueType(0),
5581 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5582 SelectCVTIntrinsicFP8(Node, 2, Opc);
5583 return;
5584 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5585 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5586 Node->getValueType(0),
5587 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5588 SelectCVTIntrinsicFP8(Node, 2, Opc);
5589 return;
5590 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5591 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5592 Node->getValueType(0),
5593 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5594 SelectCVTIntrinsicFP8(Node, 2, Opc);
5595 return;
5596 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5597 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5598 Node->getValueType(0),
5599 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5600 SelectCVTIntrinsicFP8(Node, 2, Opc);
5601 return;
5602 }
5603 } break;
5605 unsigned IntNo = Node->getConstantOperandVal(0);
5606 switch (IntNo) {
5607 default:
5608 break;
5609 case Intrinsic::aarch64_tagp:
5610 SelectTagP(Node);
5611 return;
5612
5613 case Intrinsic::ptrauth_auth:
5614 SelectPtrauthAuth(Node);
5615 return;
5616
5617 case Intrinsic::ptrauth_resign:
5618 SelectPtrauthResign(Node);
5619 return;
5620
5621 case Intrinsic::aarch64_neon_tbl2:
5622 SelectTable(Node, 2,
5623 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5624 false);
5625 return;
5626 case Intrinsic::aarch64_neon_tbl3:
5627 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5628 : AArch64::TBLv16i8Three,
5629 false);
5630 return;
5631 case Intrinsic::aarch64_neon_tbl4:
5632 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5633 : AArch64::TBLv16i8Four,
5634 false);
5635 return;
5636 case Intrinsic::aarch64_neon_tbx2:
5637 SelectTable(Node, 2,
5638 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5639 true);
5640 return;
5641 case Intrinsic::aarch64_neon_tbx3:
5642 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5643 : AArch64::TBXv16i8Three,
5644 true);
5645 return;
5646 case Intrinsic::aarch64_neon_tbx4:
5647 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5648 : AArch64::TBXv16i8Four,
5649 true);
5650 return;
5651 case Intrinsic::aarch64_sve_srshl_single_x2:
5652 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5653 Node->getValueType(0),
5654 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5655 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5656 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5657 return;
5658 case Intrinsic::aarch64_sve_srshl_single_x4:
5659 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5660 Node->getValueType(0),
5661 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5662 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5663 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5664 return;
5665 case Intrinsic::aarch64_sve_urshl_single_x2:
5666 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5667 Node->getValueType(0),
5668 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5669 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5670 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5671 return;
5672 case Intrinsic::aarch64_sve_urshl_single_x4:
5673 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5674 Node->getValueType(0),
5675 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5676 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5677 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5678 return;
5679 case Intrinsic::aarch64_sve_srshl_x2:
5680 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5681 Node->getValueType(0),
5682 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5683 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5684 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5685 return;
5686 case Intrinsic::aarch64_sve_srshl_x4:
5687 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5688 Node->getValueType(0),
5689 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5690 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5691 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5692 return;
5693 case Intrinsic::aarch64_sve_urshl_x2:
5694 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5695 Node->getValueType(0),
5696 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5697 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5698 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5699 return;
5700 case Intrinsic::aarch64_sve_urshl_x4:
5701 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5702 Node->getValueType(0),
5703 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5704 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5705 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5706 return;
5707 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5708 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5709 Node->getValueType(0),
5710 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5711 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5712 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5713 return;
5714 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5715 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5716 Node->getValueType(0),
5717 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5718 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5719 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5720 return;
5721 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5722 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5723 Node->getValueType(0),
5724 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5725 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5726 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5727 return;
5728 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5729 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5730 Node->getValueType(0),
5731 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5732 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5733 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5734 return;
5735 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
5736 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5737 Node->getValueType(0),
5738 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
5739 AArch64::FSCALE_2ZZ_D}))
5740 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5741 return;
5742 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
5743 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5744 Node->getValueType(0),
5745 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
5746 AArch64::FSCALE_4ZZ_D}))
5747 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5748 return;
5749 case Intrinsic::aarch64_sme_fp8_scale_x2:
5750 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5751 Node->getValueType(0),
5752 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
5753 AArch64::FSCALE_2Z2Z_D}))
5754 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5755 return;
5756 case Intrinsic::aarch64_sme_fp8_scale_x4:
5757 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5758 Node->getValueType(0),
5759 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
5760 AArch64::FSCALE_4Z4Z_D}))
5761 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5762 return;
5763 case Intrinsic::aarch64_sve_whilege_x2:
5764 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5765 Node->getValueType(0),
5766 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5767 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5768 SelectWhilePair(Node, Op);
5769 return;
5770 case Intrinsic::aarch64_sve_whilegt_x2:
5771 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5772 Node->getValueType(0),
5773 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5774 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5775 SelectWhilePair(Node, Op);
5776 return;
5777 case Intrinsic::aarch64_sve_whilehi_x2:
5778 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5779 Node->getValueType(0),
5780 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5781 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5782 SelectWhilePair(Node, Op);
5783 return;
5784 case Intrinsic::aarch64_sve_whilehs_x2:
5785 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5786 Node->getValueType(0),
5787 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5788 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5789 SelectWhilePair(Node, Op);
5790 return;
5791 case Intrinsic::aarch64_sve_whilele_x2:
5792 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5793 Node->getValueType(0),
5794 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5795 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5796 SelectWhilePair(Node, Op);
5797 return;
5798 case Intrinsic::aarch64_sve_whilelo_x2:
5799 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5800 Node->getValueType(0),
5801 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5802 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5803 SelectWhilePair(Node, Op);
5804 return;
5805 case Intrinsic::aarch64_sve_whilels_x2:
5806 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5807 Node->getValueType(0),
5808 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5809 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5810 SelectWhilePair(Node, Op);
5811 return;
5812 case Intrinsic::aarch64_sve_whilelt_x2:
5813 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5814 Node->getValueType(0),
5815 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5816 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5817 SelectWhilePair(Node, Op);
5818 return;
5819 case Intrinsic::aarch64_sve_smax_single_x2:
5820 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5821 Node->getValueType(0),
5822 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
5823 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
5824 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5825 return;
5826 case Intrinsic::aarch64_sve_umax_single_x2:
5827 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5828 Node->getValueType(0),
5829 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
5830 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
5831 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5832 return;
5833 case Intrinsic::aarch64_sve_fmax_single_x2:
5834 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5835 Node->getValueType(0),
5836 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
5837 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
5838 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5839 return;
5840 case Intrinsic::aarch64_sve_smax_single_x4:
5841 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5842 Node->getValueType(0),
5843 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
5844 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
5845 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5846 return;
5847 case Intrinsic::aarch64_sve_umax_single_x4:
5848 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5849 Node->getValueType(0),
5850 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
5851 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
5852 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5853 return;
5854 case Intrinsic::aarch64_sve_fmax_single_x4:
5855 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5856 Node->getValueType(0),
5857 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
5858 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
5859 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5860 return;
5861 case Intrinsic::aarch64_sve_smin_single_x2:
5862 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5863 Node->getValueType(0),
5864 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
5865 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
5866 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5867 return;
5868 case Intrinsic::aarch64_sve_umin_single_x2:
5869 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5870 Node->getValueType(0),
5871 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
5872 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
5873 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5874 return;
5875 case Intrinsic::aarch64_sve_fmin_single_x2:
5876 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5877 Node->getValueType(0),
5878 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
5879 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
5880 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5881 return;
5882 case Intrinsic::aarch64_sve_smin_single_x4:
5883 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5884 Node->getValueType(0),
5885 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
5886 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
5887 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5888 return;
5889 case Intrinsic::aarch64_sve_umin_single_x4:
5890 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5891 Node->getValueType(0),
5892 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
5893 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
5894 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5895 return;
5896 case Intrinsic::aarch64_sve_fmin_single_x4:
5897 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5898 Node->getValueType(0),
5899 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
5900 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
5901 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5902 return;
5903 case Intrinsic::aarch64_sve_smax_x2:
5904 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5905 Node->getValueType(0),
5906 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
5907 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
5908 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5909 return;
5910 case Intrinsic::aarch64_sve_umax_x2:
5911 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5912 Node->getValueType(0),
5913 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
5914 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
5915 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5916 return;
5917 case Intrinsic::aarch64_sve_fmax_x2:
5918 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5919 Node->getValueType(0),
5920 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
5921 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
5922 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5923 return;
5924 case Intrinsic::aarch64_sve_smax_x4:
5925 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5926 Node->getValueType(0),
5927 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
5928 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
5929 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5930 return;
5931 case Intrinsic::aarch64_sve_umax_x4:
5932 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5933 Node->getValueType(0),
5934 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
5935 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
5936 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5937 return;
5938 case Intrinsic::aarch64_sve_fmax_x4:
5939 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5940 Node->getValueType(0),
5941 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
5942 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
5943 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5944 return;
5945 case Intrinsic::aarch64_sme_famax_x2:
5946 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5947 Node->getValueType(0),
5948 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
5949 AArch64::FAMAX_2Z2Z_D}))
5950 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5951 return;
5952 case Intrinsic::aarch64_sme_famax_x4:
5953 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5954 Node->getValueType(0),
5955 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
5956 AArch64::FAMAX_4Z4Z_D}))
5957 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5958 return;
5959 case Intrinsic::aarch64_sme_famin_x2:
5960 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5961 Node->getValueType(0),
5962 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
5963 AArch64::FAMIN_2Z2Z_D}))
5964 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5965 return;
5966 case Intrinsic::aarch64_sme_famin_x4:
5967 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5968 Node->getValueType(0),
5969 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
5970 AArch64::FAMIN_4Z4Z_D}))
5971 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5972 return;
5973 case Intrinsic::aarch64_sve_smin_x2:
5974 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5975 Node->getValueType(0),
5976 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
5977 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
5978 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5979 return;
5980 case Intrinsic::aarch64_sve_umin_x2:
5981 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5982 Node->getValueType(0),
5983 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
5984 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
5985 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5986 return;
5987 case Intrinsic::aarch64_sve_fmin_x2:
5988 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5989 Node->getValueType(0),
5990 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
5991 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
5992 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5993 return;
5994 case Intrinsic::aarch64_sve_smin_x4:
5995 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5996 Node->getValueType(0),
5997 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
5998 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
5999 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6000 return;
6001 case Intrinsic::aarch64_sve_umin_x4:
6002 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6003 Node->getValueType(0),
6004 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6005 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6006 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6007 return;
6008 case Intrinsic::aarch64_sve_fmin_x4:
6009 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6010 Node->getValueType(0),
6011 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6012 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6013 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6014 return;
6015 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6016 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6017 Node->getValueType(0),
6018 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6019 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6020 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6021 return;
6022 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6023 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6024 Node->getValueType(0),
6025 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6026 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6027 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6028 return;
6029 case Intrinsic::aarch64_sve_fminnm_single_x2:
6030 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6031 Node->getValueType(0),
6032 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6033 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6034 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6035 return;
6036 case Intrinsic::aarch64_sve_fminnm_single_x4:
6037 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6038 Node->getValueType(0),
6039 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6040 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6041 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6042 return;
6043 case Intrinsic::aarch64_sve_fmaxnm_x2:
6044 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6045 Node->getValueType(0),
6046 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6047 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6048 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6049 return;
6050 case Intrinsic::aarch64_sve_fmaxnm_x4:
6051 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6052 Node->getValueType(0),
6053 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6054 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6055 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6056 return;
6057 case Intrinsic::aarch64_sve_fminnm_x2:
6058 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6059 Node->getValueType(0),
6060 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6061 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6062 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6063 return;
6064 case Intrinsic::aarch64_sve_fminnm_x4:
6065 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6066 Node->getValueType(0),
6067 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6068 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6069 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6070 return;
6071 case Intrinsic::aarch64_sve_fcvtzs_x2:
6072 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6073 return;
6074 case Intrinsic::aarch64_sve_scvtf_x2:
6075 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6076 return;
6077 case Intrinsic::aarch64_sve_fcvtzu_x2:
6078 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6079 return;
6080 case Intrinsic::aarch64_sve_ucvtf_x2:
6081 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6082 return;
6083 case Intrinsic::aarch64_sve_fcvtzs_x4:
6084 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6085 return;
6086 case Intrinsic::aarch64_sve_scvtf_x4:
6087 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6088 return;
6089 case Intrinsic::aarch64_sve_fcvtzu_x4:
6090 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6091 return;
6092 case Intrinsic::aarch64_sve_ucvtf_x4:
6093 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6094 return;
6095 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6096 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6097 return;
6098 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6099 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6100 return;
6101 case Intrinsic::aarch64_sve_sclamp_single_x2:
6102 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6103 Node->getValueType(0),
6104 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6105 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6106 SelectClamp(Node, 2, Op);
6107 return;
6108 case Intrinsic::aarch64_sve_uclamp_single_x2:
6109 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6110 Node->getValueType(0),
6111 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6112 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6113 SelectClamp(Node, 2, Op);
6114 return;
6115 case Intrinsic::aarch64_sve_fclamp_single_x2:
6116 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6117 Node->getValueType(0),
6118 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6119 AArch64::FCLAMP_VG2_2Z2Z_D}))
6120 SelectClamp(Node, 2, Op);
6121 return;
6122 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6123 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6124 return;
6125 case Intrinsic::aarch64_sve_sclamp_single_x4:
6126 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6127 Node->getValueType(0),
6128 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6129 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6130 SelectClamp(Node, 4, Op);
6131 return;
6132 case Intrinsic::aarch64_sve_uclamp_single_x4:
6133 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6134 Node->getValueType(0),
6135 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6136 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6137 SelectClamp(Node, 4, Op);
6138 return;
6139 case Intrinsic::aarch64_sve_fclamp_single_x4:
6140 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6141 Node->getValueType(0),
6142 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6143 AArch64::FCLAMP_VG4_4Z4Z_D}))
6144 SelectClamp(Node, 4, Op);
6145 return;
6146 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6147 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6148 return;
6149 case Intrinsic::aarch64_sve_add_single_x2:
6150 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6151 Node->getValueType(0),
6152 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6153 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6154 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6155 return;
6156 case Intrinsic::aarch64_sve_add_single_x4:
6157 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6158 Node->getValueType(0),
6159 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6160 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6161 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6162 return;
6163 case Intrinsic::aarch64_sve_zip_x2:
6164 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6165 Node->getValueType(0),
6166 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6167 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6168 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6169 return;
6170 case Intrinsic::aarch64_sve_zipq_x2:
6171 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6172 AArch64::ZIP_VG2_2ZZZ_Q);
6173 return;
6174 case Intrinsic::aarch64_sve_zip_x4:
6175 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6176 Node->getValueType(0),
6177 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6178 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6179 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6180 return;
6181 case Intrinsic::aarch64_sve_zipq_x4:
6182 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6183 AArch64::ZIP_VG4_4Z4Z_Q);
6184 return;
6185 case Intrinsic::aarch64_sve_uzp_x2:
6186 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6187 Node->getValueType(0),
6188 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6189 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6190 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6191 return;
6192 case Intrinsic::aarch64_sve_uzpq_x2:
6193 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6194 AArch64::UZP_VG2_2ZZZ_Q);
6195 return;
6196 case Intrinsic::aarch64_sve_uzp_x4:
6197 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6198 Node->getValueType(0),
6199 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6200 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6201 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6202 return;
6203 case Intrinsic::aarch64_sve_uzpq_x4:
6204 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6205 AArch64::UZP_VG4_4Z4Z_Q);
6206 return;
6207 case Intrinsic::aarch64_sve_sel_x2:
6208 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6209 Node->getValueType(0),
6210 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6211 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6212 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6213 return;
6214 case Intrinsic::aarch64_sve_sel_x4:
6215 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6216 Node->getValueType(0),
6217 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6218 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6219 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6220 return;
6221 case Intrinsic::aarch64_sve_frinta_x2:
6222 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6223 return;
6224 case Intrinsic::aarch64_sve_frinta_x4:
6225 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6226 return;
6227 case Intrinsic::aarch64_sve_frintm_x2:
6228 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6229 return;
6230 case Intrinsic::aarch64_sve_frintm_x4:
6231 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6232 return;
6233 case Intrinsic::aarch64_sve_frintn_x2:
6234 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6235 return;
6236 case Intrinsic::aarch64_sve_frintn_x4:
6237 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6238 return;
6239 case Intrinsic::aarch64_sve_frintp_x2:
6240 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6241 return;
6242 case Intrinsic::aarch64_sve_frintp_x4:
6243 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6244 return;
6245 case Intrinsic::aarch64_sve_sunpk_x2:
6246 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6247 Node->getValueType(0),
6248 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6249 AArch64::SUNPK_VG2_2ZZ_D}))
6250 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6251 return;
6252 case Intrinsic::aarch64_sve_uunpk_x2:
6253 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6254 Node->getValueType(0),
6255 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6256 AArch64::UUNPK_VG2_2ZZ_D}))
6257 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6258 return;
6259 case Intrinsic::aarch64_sve_sunpk_x4:
6260 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6261 Node->getValueType(0),
6262 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6263 AArch64::SUNPK_VG4_4Z2Z_D}))
6264 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6265 return;
6266 case Intrinsic::aarch64_sve_uunpk_x4:
6267 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6268 Node->getValueType(0),
6269 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6270 AArch64::UUNPK_VG4_4Z2Z_D}))
6271 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6272 return;
6273 case Intrinsic::aarch64_sve_pext_x2: {
6274 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6275 Node->getValueType(0),
6276 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6277 AArch64::PEXT_2PCI_D}))
6278 SelectPExtPair(Node, Op);
6279 return;
6280 }
6281 }
6282 break;
6283 }
6284 case ISD::INTRINSIC_VOID: {
6285 unsigned IntNo = Node->getConstantOperandVal(1);
6286 if (Node->getNumOperands() >= 3)
6287 VT = Node->getOperand(2)->getValueType(0);
6288 switch (IntNo) {
6289 default:
6290 break;
6291 case Intrinsic::aarch64_neon_st1x2: {
6292 if (VT == MVT::v8i8) {
6293 SelectStore(Node, 2, AArch64::ST1Twov8b);
6294 return;
6295 } else if (VT == MVT::v16i8) {
6296 SelectStore(Node, 2, AArch64::ST1Twov16b);
6297 return;
6298 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6299 VT == MVT::v4bf16) {
6300 SelectStore(Node, 2, AArch64::ST1Twov4h);
6301 return;
6302 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6303 VT == MVT::v8bf16) {
6304 SelectStore(Node, 2, AArch64::ST1Twov8h);
6305 return;
6306 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6307 SelectStore(Node, 2, AArch64::ST1Twov2s);
6308 return;
6309 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6310 SelectStore(Node, 2, AArch64::ST1Twov4s);
6311 return;
6312 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6313 SelectStore(Node, 2, AArch64::ST1Twov2d);
6314 return;
6315 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6316 SelectStore(Node, 2, AArch64::ST1Twov1d);
6317 return;
6318 }
6319 break;
6320 }
6321 case Intrinsic::aarch64_neon_st1x3: {
6322 if (VT == MVT::v8i8) {
6323 SelectStore(Node, 3, AArch64::ST1Threev8b);
6324 return;
6325 } else if (VT == MVT::v16i8) {
6326 SelectStore(Node, 3, AArch64::ST1Threev16b);
6327 return;
6328 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6329 VT == MVT::v4bf16) {
6330 SelectStore(Node, 3, AArch64::ST1Threev4h);
6331 return;
6332 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6333 VT == MVT::v8bf16) {
6334 SelectStore(Node, 3, AArch64::ST1Threev8h);
6335 return;
6336 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6337 SelectStore(Node, 3, AArch64::ST1Threev2s);
6338 return;
6339 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6340 SelectStore(Node, 3, AArch64::ST1Threev4s);
6341 return;
6342 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6343 SelectStore(Node, 3, AArch64::ST1Threev2d);
6344 return;
6345 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6346 SelectStore(Node, 3, AArch64::ST1Threev1d);
6347 return;
6348 }
6349 break;
6350 }
6351 case Intrinsic::aarch64_neon_st1x4: {
6352 if (VT == MVT::v8i8) {
6353 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6354 return;
6355 } else if (VT == MVT::v16i8) {
6356 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6357 return;
6358 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6359 VT == MVT::v4bf16) {
6360 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6361 return;
6362 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6363 VT == MVT::v8bf16) {
6364 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6365 return;
6366 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6367 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6368 return;
6369 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6370 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6371 return;
6372 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6373 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6374 return;
6375 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6376 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6377 return;
6378 }
6379 break;
6380 }
6381 case Intrinsic::aarch64_neon_st2: {
6382 if (VT == MVT::v8i8) {
6383 SelectStore(Node, 2, AArch64::ST2Twov8b);
6384 return;
6385 } else if (VT == MVT::v16i8) {
6386 SelectStore(Node, 2, AArch64::ST2Twov16b);
6387 return;
6388 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6389 VT == MVT::v4bf16) {
6390 SelectStore(Node, 2, AArch64::ST2Twov4h);
6391 return;
6392 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6393 VT == MVT::v8bf16) {
6394 SelectStore(Node, 2, AArch64::ST2Twov8h);
6395 return;
6396 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6397 SelectStore(Node, 2, AArch64::ST2Twov2s);
6398 return;
6399 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6400 SelectStore(Node, 2, AArch64::ST2Twov4s);
6401 return;
6402 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6403 SelectStore(Node, 2, AArch64::ST2Twov2d);
6404 return;
6405 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6406 SelectStore(Node, 2, AArch64::ST1Twov1d);
6407 return;
6408 }
6409 break;
6410 }
6411 case Intrinsic::aarch64_neon_st3: {
6412 if (VT == MVT::v8i8) {
6413 SelectStore(Node, 3, AArch64::ST3Threev8b);
6414 return;
6415 } else if (VT == MVT::v16i8) {
6416 SelectStore(Node, 3, AArch64::ST3Threev16b);
6417 return;
6418 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6419 VT == MVT::v4bf16) {
6420 SelectStore(Node, 3, AArch64::ST3Threev4h);
6421 return;
6422 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6423 VT == MVT::v8bf16) {
6424 SelectStore(Node, 3, AArch64::ST3Threev8h);
6425 return;
6426 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6427 SelectStore(Node, 3, AArch64::ST3Threev2s);
6428 return;
6429 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6430 SelectStore(Node, 3, AArch64::ST3Threev4s);
6431 return;
6432 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6433 SelectStore(Node, 3, AArch64::ST3Threev2d);
6434 return;
6435 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6436 SelectStore(Node, 3, AArch64::ST1Threev1d);
6437 return;
6438 }
6439 break;
6440 }
6441 case Intrinsic::aarch64_neon_st4: {
6442 if (VT == MVT::v8i8) {
6443 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6444 return;
6445 } else if (VT == MVT::v16i8) {
6446 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6447 return;
6448 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6449 VT == MVT::v4bf16) {
6450 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6451 return;
6452 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6453 VT == MVT::v8bf16) {
6454 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6455 return;
6456 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6457 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6458 return;
6459 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6460 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6461 return;
6462 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6463 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6464 return;
6465 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6466 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6467 return;
6468 }
6469 break;
6470 }
6471 case Intrinsic::aarch64_neon_st2lane: {
6472 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6473 SelectStoreLane(Node, 2, AArch64::ST2i8);
6474 return;
6475 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6476 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6477 SelectStoreLane(Node, 2, AArch64::ST2i16);
6478 return;
6479 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6480 VT == MVT::v2f32) {
6481 SelectStoreLane(Node, 2, AArch64::ST2i32);
6482 return;
6483 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6484 VT == MVT::v1f64) {
6485 SelectStoreLane(Node, 2, AArch64::ST2i64);
6486 return;
6487 }
6488 break;
6489 }
6490 case Intrinsic::aarch64_neon_st3lane: {
6491 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6492 SelectStoreLane(Node, 3, AArch64::ST3i8);
6493 return;
6494 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6495 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6496 SelectStoreLane(Node, 3, AArch64::ST3i16);
6497 return;
6498 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6499 VT == MVT::v2f32) {
6500 SelectStoreLane(Node, 3, AArch64::ST3i32);
6501 return;
6502 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6503 VT == MVT::v1f64) {
6504 SelectStoreLane(Node, 3, AArch64::ST3i64);
6505 return;
6506 }
6507 break;
6508 }
6509 case Intrinsic::aarch64_neon_st4lane: {
6510 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6511 SelectStoreLane(Node, 4, AArch64::ST4i8);
6512 return;
6513 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6514 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6515 SelectStoreLane(Node, 4, AArch64::ST4i16);
6516 return;
6517 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6518 VT == MVT::v2f32) {
6519 SelectStoreLane(Node, 4, AArch64::ST4i32);
6520 return;
6521 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6522 VT == MVT::v1f64) {
6523 SelectStoreLane(Node, 4, AArch64::ST4i64);
6524 return;
6525 }
6526 break;
6527 }
6528 case Intrinsic::aarch64_sve_st2q: {
6529 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6530 return;
6531 }
6532 case Intrinsic::aarch64_sve_st3q: {
6533 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6534 return;
6535 }
6536 case Intrinsic::aarch64_sve_st4q: {
6537 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6538 return;
6539 }
6540 case Intrinsic::aarch64_sve_st2: {
6541 if (VT == MVT::nxv16i8) {
6542 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6543 return;
6544 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6545 VT == MVT::nxv8bf16) {
6546 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6547 return;
6548 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6549 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6550 return;
6551 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6552 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6553 return;
6554 }
6555 break;
6556 }
6557 case Intrinsic::aarch64_sve_st3: {
6558 if (VT == MVT::nxv16i8) {
6559 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6560 return;
6561 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6562 VT == MVT::nxv8bf16) {
6563 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6564 return;
6565 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6566 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6567 return;
6568 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6569 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6570 return;
6571 }
6572 break;
6573 }
6574 case Intrinsic::aarch64_sve_st4: {
6575 if (VT == MVT::nxv16i8) {
6576 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6577 return;
6578 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6579 VT == MVT::nxv8bf16) {
6580 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6581 return;
6582 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6583 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6584 return;
6585 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6586 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6587 return;
6588 }
6589 break;
6590 }
6591 }
6592 break;
6593 }
6594 case AArch64ISD::LD2post: {
6595 if (VT == MVT::v8i8) {
6596 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6597 return;
6598 } else if (VT == MVT::v16i8) {
6599 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6600 return;
6601 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6602 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6603 return;
6604 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6605 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6606 return;
6607 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6608 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6609 return;
6610 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6611 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6612 return;
6613 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6614 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6615 return;
6616 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6617 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6618 return;
6619 }
6620 break;
6621 }
6622 case AArch64ISD::LD3post: {
6623 if (VT == MVT::v8i8) {
6624 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6625 return;
6626 } else if (VT == MVT::v16i8) {
6627 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6628 return;
6629 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6630 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6631 return;
6632 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6633 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6634 return;
6635 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6636 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6637 return;
6638 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6639 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6640 return;
6641 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6642 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6643 return;
6644 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6645 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6646 return;
6647 }
6648 break;
6649 }
6650 case AArch64ISD::LD4post: {
6651 if (VT == MVT::v8i8) {
6652 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6653 return;
6654 } else if (VT == MVT::v16i8) {
6655 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6656 return;
6657 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6658 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6659 return;
6660 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6661 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6662 return;
6663 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6664 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6665 return;
6666 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6667 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6668 return;
6669 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6670 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6671 return;
6672 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6673 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6674 return;
6675 }
6676 break;
6677 }
6678 case AArch64ISD::LD1x2post: {
6679 if (VT == MVT::v8i8) {
6680 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6681 return;
6682 } else if (VT == MVT::v16i8) {
6683 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6684 return;
6685 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6686 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6687 return;
6688 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6689 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6690 return;
6691 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6692 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6693 return;
6694 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6695 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6696 return;
6697 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6698 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6699 return;
6700 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6701 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6702 return;
6703 }
6704 break;
6705 }
6706 case AArch64ISD::LD1x3post: {
6707 if (VT == MVT::v8i8) {
6708 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6709 return;
6710 } else if (VT == MVT::v16i8) {
6711 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6712 return;
6713 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6714 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6715 return;
6716 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6717 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6718 return;
6719 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6720 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6721 return;
6722 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6723 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6724 return;
6725 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6726 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6727 return;
6728 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6729 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6730 return;
6731 }
6732 break;
6733 }
6734 case AArch64ISD::LD1x4post: {
6735 if (VT == MVT::v8i8) {
6736 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6737 return;
6738 } else if (VT == MVT::v16i8) {
6739 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6740 return;
6741 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6742 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6743 return;
6744 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6745 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6746 return;
6747 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6748 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6749 return;
6750 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6751 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6752 return;
6753 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6754 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6755 return;
6756 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6757 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6758 return;
6759 }
6760 break;
6761 }
6763 if (VT == MVT::v8i8) {
6764 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6765 return;
6766 } else if (VT == MVT::v16i8) {
6767 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6768 return;
6769 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6770 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6771 return;
6772 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6773 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6774 return;
6775 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6776 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6777 return;
6778 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6779 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6780 return;
6781 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6782 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6783 return;
6784 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6785 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6786 return;
6787 }
6788 break;
6789 }
6791 if (VT == MVT::v8i8) {
6792 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6793 return;
6794 } else if (VT == MVT::v16i8) {
6795 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6796 return;
6797 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6798 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6799 return;
6800 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6801 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
6802 return;
6803 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6804 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
6805 return;
6806 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6807 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6808 return;
6809 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6810 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
6811 return;
6812 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6813 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
6814 return;
6815 }
6816 break;
6817 }
6819 if (VT == MVT::v8i8) {
6820 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
6821 return;
6822 } else if (VT == MVT::v16i8) {
6823 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
6824 return;
6825 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6826 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
6827 return;
6828 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6829 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
6830 return;
6831 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6832 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
6833 return;
6834 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6835 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
6836 return;
6837 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6838 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
6839 return;
6840 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6841 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
6842 return;
6843 }
6844 break;
6845 }
6847 if (VT == MVT::v8i8) {
6848 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
6849 return;
6850 } else if (VT == MVT::v16i8) {
6851 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
6852 return;
6853 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6854 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
6855 return;
6856 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6857 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
6858 return;
6859 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6860 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
6861 return;
6862 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6863 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
6864 return;
6865 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6866 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
6867 return;
6868 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6869 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
6870 return;
6871 }
6872 break;
6873 }
6875 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6876 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
6877 return;
6878 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6879 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6880 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
6881 return;
6882 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6883 VT == MVT::v2f32) {
6884 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
6885 return;
6886 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6887 VT == MVT::v1f64) {
6888 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
6889 return;
6890 }
6891 break;
6892 }
6894 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6895 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
6896 return;
6897 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6898 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6899 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
6900 return;
6901 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6902 VT == MVT::v2f32) {
6903 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
6904 return;
6905 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6906 VT == MVT::v1f64) {
6907 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
6908 return;
6909 }
6910 break;
6911 }
6913 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6914 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
6915 return;
6916 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6917 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6918 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
6919 return;
6920 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6921 VT == MVT::v2f32) {
6922 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
6923 return;
6924 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6925 VT == MVT::v1f64) {
6926 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
6927 return;
6928 }
6929 break;
6930 }
6932 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6933 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
6934 return;
6935 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6936 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6937 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
6938 return;
6939 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6940 VT == MVT::v2f32) {
6941 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
6942 return;
6943 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6944 VT == MVT::v1f64) {
6945 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
6946 return;
6947 }
6948 break;
6949 }
6950 case AArch64ISD::ST2post: {
6951 VT = Node->getOperand(1).getValueType();
6952 if (VT == MVT::v8i8) {
6953 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
6954 return;
6955 } else if (VT == MVT::v16i8) {
6956 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
6957 return;
6958 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6959 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
6960 return;
6961 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6962 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
6963 return;
6964 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6965 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
6966 return;
6967 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6968 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
6969 return;
6970 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6971 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
6972 return;
6973 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6974 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6975 return;
6976 }
6977 break;
6978 }
6979 case AArch64ISD::ST3post: {
6980 VT = Node->getOperand(1).getValueType();
6981 if (VT == MVT::v8i8) {
6982 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
6983 return;
6984 } else if (VT == MVT::v16i8) {
6985 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
6986 return;
6987 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6988 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
6989 return;
6990 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6991 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
6992 return;
6993 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6994 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
6995 return;
6996 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6997 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
6998 return;
6999 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7000 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7001 return;
7002 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7003 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7004 return;
7005 }
7006 break;
7007 }
7008 case AArch64ISD::ST4post: {
7009 VT = Node->getOperand(1).getValueType();
7010 if (VT == MVT::v8i8) {
7011 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7012 return;
7013 } else if (VT == MVT::v16i8) {
7014 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7015 return;
7016 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7017 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7018 return;
7019 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7020 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7021 return;
7022 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7023 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7024 return;
7025 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7026 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7027 return;
7028 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7029 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7030 return;
7031 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7032 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7033 return;
7034 }
7035 break;
7036 }
7037 case AArch64ISD::ST1x2post: {
7038 VT = Node->getOperand(1).getValueType();
7039 if (VT == MVT::v8i8) {
7040 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7041 return;
7042 } else if (VT == MVT::v16i8) {
7043 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7044 return;
7045 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7046 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7047 return;
7048 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7049 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7050 return;
7051 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7052 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7053 return;
7054 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7055 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7056 return;
7057 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7058 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7059 return;
7060 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7061 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7062 return;
7063 }
7064 break;
7065 }
7066 case AArch64ISD::ST1x3post: {
7067 VT = Node->getOperand(1).getValueType();
7068 if (VT == MVT::v8i8) {
7069 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7070 return;
7071 } else if (VT == MVT::v16i8) {
7072 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7073 return;
7074 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7075 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7076 return;
7077 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7078 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7079 return;
7080 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7081 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7082 return;
7083 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7084 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7085 return;
7086 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7087 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7088 return;
7089 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7090 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7091 return;
7092 }
7093 break;
7094 }
7095 case AArch64ISD::ST1x4post: {
7096 VT = Node->getOperand(1).getValueType();
7097 if (VT == MVT::v8i8) {
7098 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7099 return;
7100 } else if (VT == MVT::v16i8) {
7101 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7102 return;
7103 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7104 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7105 return;
7106 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7107 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7108 return;
7109 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7110 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7111 return;
7112 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7113 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7114 return;
7115 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7116 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7117 return;
7118 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7119 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7120 return;
7121 }
7122 break;
7123 }
7125 VT = Node->getOperand(1).getValueType();
7126 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7127 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7128 return;
7129 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7130 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7131 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7132 return;
7133 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7134 VT == MVT::v2f32) {
7135 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7136 return;
7137 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7138 VT == MVT::v1f64) {
7139 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7140 return;
7141 }
7142 break;
7143 }
7145 VT = Node->getOperand(1).getValueType();
7146 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7147 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7148 return;
7149 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7150 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7151 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7152 return;
7153 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7154 VT == MVT::v2f32) {
7155 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7156 return;
7157 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7158 VT == MVT::v1f64) {
7159 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7160 return;
7161 }
7162 break;
7163 }
7165 VT = Node->getOperand(1).getValueType();
7166 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7167 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7168 return;
7169 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7170 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7171 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7172 return;
7173 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7174 VT == MVT::v2f32) {
7175 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7176 return;
7177 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7178 VT == MVT::v1f64) {
7179 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7180 return;
7181 }
7182 break;
7183 }
7185 if (VT == MVT::nxv16i8) {
7186 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
7187 return;
7188 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7189 VT == MVT::nxv8bf16) {
7190 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
7191 return;
7192 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7193 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
7194 return;
7195 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7196 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
7197 return;
7198 }
7199 break;
7200 }
7202 if (VT == MVT::nxv16i8) {
7203 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
7204 return;
7205 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7206 VT == MVT::nxv8bf16) {
7207 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
7208 return;
7209 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7210 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
7211 return;
7212 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7213 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
7214 return;
7215 }
7216 break;
7217 }
7219 if (VT == MVT::nxv16i8) {
7220 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
7221 return;
7222 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7223 VT == MVT::nxv8bf16) {
7224 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
7225 return;
7226 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7227 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
7228 return;
7229 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7230 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
7231 return;
7232 }
7233 break;
7234 }
7235 }
7236
7237 // Select the default instruction
7238 SelectCode(Node);
7239}
7240
7241/// createAArch64ISelDag - This pass converts a legalized DAG into a
7242/// AArch64-specific DAG, ready for instruction scheduling.
7244 CodeGenOptLevel OptLevel) {
7245 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7246}
7247
7248/// When \p PredVT is a scalable vector predicate in the form
7249/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7250/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7251/// structured vectors (NumVec >1), the output data type is
7252/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7253/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7254/// EVT.
7256 unsigned NumVec) {
7257 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7258 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7259 return EVT();
7260
7261 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7262 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7263 return EVT();
7264
7265 ElementCount EC = PredVT.getVectorElementCount();
7266 EVT ScalarVT =
7267 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7268 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7269
7270 return MemVT;
7271}
7272
7273/// Return the EVT of the data associated to a memory operation in \p
7274/// Root. If such EVT cannot be retrived, it returns an invalid EVT.
7276 if (isa<MemSDNode>(Root))
7277 return cast<MemSDNode>(Root)->getMemoryVT();
7278
7279 if (isa<MemIntrinsicSDNode>(Root))
7280 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
7281
7282 const unsigned Opcode = Root->getOpcode();
7283 // For custom ISD nodes, we have to look at them individually to extract the
7284 // type of the data moved to/from memory.
7285 switch (Opcode) {
7290 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7292 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7295 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
7298 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
7301 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
7302 default:
7303 break;
7304 }
7305
7306 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7307 return EVT();
7308
7309 switch (Root->getConstantOperandVal(1)) {
7310 default:
7311 return EVT();
7312 case Intrinsic::aarch64_sme_ldr:
7313 case Intrinsic::aarch64_sme_str:
7314 return MVT::nxv16i8;
7315 case Intrinsic::aarch64_sve_prf:
7316 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7317 // width of the predicate.
7319 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7320 case Intrinsic::aarch64_sve_ld2_sret:
7321 case Intrinsic::aarch64_sve_ld2q_sret:
7323 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7324 case Intrinsic::aarch64_sve_st2q:
7326 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7327 case Intrinsic::aarch64_sve_ld3_sret:
7328 case Intrinsic::aarch64_sve_ld3q_sret:
7330 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7331 case Intrinsic::aarch64_sve_st3q:
7333 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7334 case Intrinsic::aarch64_sve_ld4_sret:
7335 case Intrinsic::aarch64_sve_ld4q_sret:
7337 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7338 case Intrinsic::aarch64_sve_st4q:
7340 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7341 case Intrinsic::aarch64_sve_ld1udq:
7342 case Intrinsic::aarch64_sve_st1dq:
7343 return EVT(MVT::nxv1i64);
7344 case Intrinsic::aarch64_sve_ld1uwq:
7345 case Intrinsic::aarch64_sve_st1wq:
7346 return EVT(MVT::nxv1i32);
7347 }
7348}
7349
7350/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7351/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7352/// where Root is the memory access using N for its address.
7353template <int64_t Min, int64_t Max>
7354bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7355 SDValue &Base,
7356 SDValue &OffImm) {
7357 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7358 const DataLayout &DL = CurDAG->getDataLayout();
7359 const MachineFrameInfo &MFI = MF->getFrameInfo();
7360
7361 if (N.getOpcode() == ISD::FrameIndex) {
7362 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7363 // We can only encode VL scaled offsets, so only fold in frame indexes
7364 // referencing SVE objects.
7366 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7367 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7368 return true;
7369 }
7370
7371 return false;
7372 }
7373
7374 if (MemVT == EVT())
7375 return false;
7376
7377 if (N.getOpcode() != ISD::ADD)
7378 return false;
7379
7380 SDValue VScale = N.getOperand(1);
7381 if (VScale.getOpcode() != ISD::VSCALE)
7382 return false;
7383
7384 TypeSize TS = MemVT.getSizeInBits();
7385 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7386 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7387
7388 if ((MulImm % MemWidthBytes) != 0)
7389 return false;
7390
7391 int64_t Offset = MulImm / MemWidthBytes;
7392 if (Offset < Min || Offset > Max)
7393 return false;
7394
7395 Base = N.getOperand(0);
7396 if (Base.getOpcode() == ISD::FrameIndex) {
7397 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7398 // We can only encode VL scaled offsets, so only fold in frame indexes
7399 // referencing SVE objects.
7401 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7402 }
7403
7404 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7405 return true;
7406}
7407
7408/// Select register plus register addressing mode for SVE, with scaled
7409/// offset.
7410bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7411 SDValue &Base,
7412 SDValue &Offset) {
7413 if (N.getOpcode() != ISD::ADD)
7414 return false;
7415
7416 // Process an ADD node.
7417 const SDValue LHS = N.getOperand(0);
7418 const SDValue RHS = N.getOperand(1);
7419
7420 // 8 bit data does not come with the SHL node, so it is treated
7421 // separately.
7422 if (Scale == 0) {
7423 Base = LHS;
7424 Offset = RHS;
7425 return true;
7426 }
7427
7428 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7429 int64_t ImmOff = C->getSExtValue();
7430 unsigned Size = 1 << Scale;
7431
7432 // To use the reg+reg addressing mode, the immediate must be a multiple of
7433 // the vector element's byte size.
7434 if (ImmOff % Size)
7435 return false;
7436
7437 SDLoc DL(N);
7438 Base = LHS;
7439 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7440 SDValue Ops[] = {Offset};
7441 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7442 Offset = SDValue(MI, 0);
7443 return true;
7444 }
7445
7446 // Check if the RHS is a shift node with a constant.
7447 if (RHS.getOpcode() != ISD::SHL)
7448 return false;
7449
7450 const SDValue ShiftRHS = RHS.getOperand(1);
7451 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7452 if (C->getZExtValue() == Scale) {
7453 Base = LHS;
7454 Offset = RHS.getOperand(0);
7455 return true;
7456 }
7457
7458 return false;
7459}
7460
7461bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7462 const AArch64TargetLowering *TLI =
7463 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7464
7465 return TLI->isAllActivePredicate(*CurDAG, N);
7466}
7467
7468bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7469 EVT VT = N.getValueType();
7470 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7471}
7472
7473bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7475 unsigned Scale) {
7476 // Try to untangle an ADD node into a 'reg + offset'
7477 if (CurDAG->isBaseWithConstantOffset(N))
7478 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
7479 int64_t ImmOff = C->getSExtValue();
7480 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
7481 Base = N.getOperand(0);
7482 Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7483 return true;
7484 }
7485 }
7486
7487 // By default, just match reg + 0.
7488 Base = N;
7489 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7490 return true;
7491}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
#define PASS_NAME
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
#define DEBUG_TYPE
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
aarch64 promote const
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Size
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * RHS
Value * LHS
support::ulittle16_t & Lo
Definition: aarch32.cpp:204
support::ulittle16_t & Hi
Definition: aarch32.cpp:203
DEMANGLE_DUMP_METHOD void dump() const
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
bool getExactInverse(APFloat *inv) const
Definition: APFloat.h:1484
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1326
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1649
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1007
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:258
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1618
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1577
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1434
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:510
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
iterator begin() const
Definition: ArrayRef.h:156
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint8_t getStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:458
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:701
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
size_t size() const
Definition: SmallVector.h:78
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:700
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:927
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1312
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1308
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1407
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1319
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:223
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1559
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1590
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:480
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:280
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:347
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:286
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1952
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:341
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:274
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Extended Value Type.
Definition: ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:202
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43