LLVM 20.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64
65 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
66 /// inline asm expressions.
68 InlineAsm::ConstraintCode ConstraintID,
69 std::vector<SDValue> &OutOps) override;
70
71 template <signed Low, signed High, signed Scale>
72 bool SelectRDVLImm(SDValue N, SDValue &Imm);
73
74 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
75 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
76 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
77 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
78 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
79 return SelectShiftedRegister(N, false, Reg, Shift);
80 }
81 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
82 return SelectShiftedRegister(N, true, Reg, Shift);
83 }
84 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
85 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
86 }
87 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
88 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
89 }
90 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
91 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
92 }
93 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
94 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
95 }
96 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
97 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
98 }
99 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
100 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
101 }
102 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
103 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
104 }
105 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
106 return SelectAddrModeIndexed(N, 1, Base, OffImm);
107 }
108 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
109 return SelectAddrModeIndexed(N, 2, Base, OffImm);
110 }
111 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
112 return SelectAddrModeIndexed(N, 4, Base, OffImm);
113 }
114 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
115 return SelectAddrModeIndexed(N, 8, Base, OffImm);
116 }
117 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
118 return SelectAddrModeIndexed(N, 16, Base, OffImm);
119 }
120 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
121 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
122 }
123 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
124 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
125 }
126 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
127 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
128 }
129 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
130 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
131 }
132 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
133 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
134 }
135 template <unsigned Size, unsigned Max>
136 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
137 // Test if there is an appropriate addressing mode and check if the
138 // immediate fits.
139 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
140 if (Found) {
141 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
142 int64_t C = CI->getSExtValue();
143 if (C <= Max)
144 return true;
145 }
146 }
147
148 // Otherwise, base only, materialize address in register.
149 Base = N;
150 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
151 return true;
152 }
153
154 template<int Width>
155 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
156 SDValue &SignExtend, SDValue &DoShift) {
157 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
158 }
159
160 template<int Width>
161 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
162 SDValue &SignExtend, SDValue &DoShift) {
163 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
164 }
165
166 bool SelectExtractHigh(SDValue N, SDValue &Res) {
167 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
168 N = N->getOperand(0);
169 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
170 !isa<ConstantSDNode>(N->getOperand(1)))
171 return false;
172 EVT VT = N->getValueType(0);
173 EVT LVT = N->getOperand(0).getValueType();
174 unsigned Index = N->getConstantOperandVal(1);
175 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
176 Index != VT.getVectorNumElements())
177 return false;
178 Res = N->getOperand(0);
179 return true;
180 }
181
182 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
183 if (N.getOpcode() != AArch64ISD::VLSHR)
184 return false;
185 SDValue Op = N->getOperand(0);
186 EVT VT = Op.getValueType();
187 unsigned ShtAmt = N->getConstantOperandVal(1);
188 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
189 return false;
190
191 APInt Imm;
192 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
193 Imm = APInt(VT.getScalarSizeInBits(),
194 Op.getOperand(1).getConstantOperandVal(0)
195 << Op.getOperand(1).getConstantOperandVal(1));
196 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
197 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
198 Imm = APInt(VT.getScalarSizeInBits(),
199 Op.getOperand(1).getConstantOperandVal(0));
200 else
201 return false;
202
203 if (Imm != 1ULL << (ShtAmt - 1))
204 return false;
205
206 Res1 = Op.getOperand(0);
207 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
208 return true;
209 }
210
211 bool SelectDupZeroOrUndef(SDValue N) {
212 switch(N->getOpcode()) {
213 case ISD::UNDEF:
214 return true;
215 case AArch64ISD::DUP:
216 case ISD::SPLAT_VECTOR: {
217 auto Opnd0 = N->getOperand(0);
218 if (isNullConstant(Opnd0))
219 return true;
220 if (isNullFPConstant(Opnd0))
221 return true;
222 break;
223 }
224 default:
225 break;
226 }
227
228 return false;
229 }
230
231 bool SelectDupZero(SDValue N) {
232 switch(N->getOpcode()) {
233 case AArch64ISD::DUP:
234 case ISD::SPLAT_VECTOR: {
235 auto Opnd0 = N->getOperand(0);
236 if (isNullConstant(Opnd0))
237 return true;
238 if (isNullFPConstant(Opnd0))
239 return true;
240 break;
241 }
242 }
243
244 return false;
245 }
246
247 template<MVT::SimpleValueType VT>
248 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
249 return SelectSVEAddSubImm(N, VT, Imm, Shift);
250 }
251
252 template <MVT::SimpleValueType VT, bool Negate>
253 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
254 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
255 }
256
257 template <MVT::SimpleValueType VT>
258 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
259 return SelectSVECpyDupImm(N, VT, Imm, Shift);
260 }
261
262 template <MVT::SimpleValueType VT, bool Invert = false>
263 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
264 return SelectSVELogicalImm(N, VT, Imm, Invert);
265 }
266
267 template <MVT::SimpleValueType VT>
268 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
269 return SelectSVEArithImm(N, VT, Imm);
270 }
271
272 template <unsigned Low, unsigned High, bool AllowSaturation = false>
273 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
274 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
275 }
276
277 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
278 if (N->getOpcode() != ISD::SPLAT_VECTOR)
279 return false;
280
281 EVT EltVT = N->getValueType(0).getVectorElementType();
282 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
283 /* High */ EltVT.getFixedSizeInBits(),
284 /* AllowSaturation */ true, Imm);
285 }
286
287 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
288 template<signed Min, signed Max, signed Scale, bool Shift>
289 bool SelectCntImm(SDValue N, SDValue &Imm) {
290 if (!isa<ConstantSDNode>(N))
291 return false;
292
293 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
294 if (Shift)
295 MulImm = 1LL << MulImm;
296
297 if ((MulImm % std::abs(Scale)) != 0)
298 return false;
299
300 MulImm /= Scale;
301 if ((MulImm >= Min) && (MulImm <= Max)) {
302 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
303 return true;
304 }
305
306 return false;
307 }
308
309 template <signed Max, signed Scale>
310 bool SelectEXTImm(SDValue N, SDValue &Imm) {
311 if (!isa<ConstantSDNode>(N))
312 return false;
313
314 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
315
316 if (MulImm >= 0 && MulImm <= Max) {
317 MulImm *= Scale;
318 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
319 return true;
320 }
321
322 return false;
323 }
324
325 template <unsigned BaseReg, unsigned Max>
326 bool ImmToReg(SDValue N, SDValue &Imm) {
327 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
328 uint64_t C = CI->getZExtValue();
329
330 if (C > Max)
331 return false;
332
333 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
334 return true;
335 }
336 return false;
337 }
338
339 /// Form sequences of consecutive 64/128-bit registers for use in NEON
340 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
341 /// between 1 and 4 elements. If it contains a single element that is returned
342 /// unchanged; otherwise a REG_SEQUENCE value is returned.
345 // Form a sequence of SVE registers for instructions using list of vectors,
346 // e.g. structured loads and stores (ldN, stN).
347 SDValue createZTuple(ArrayRef<SDValue> Vecs);
348
349 // Similar to above, except the register must start at a multiple of the
350 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
351 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
352
353 /// Generic helper for the createDTuple/createQTuple
354 /// functions. Those should almost always be called instead.
355 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
356 const unsigned SubRegs[]);
357
358 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
359
360 bool tryIndexedLoad(SDNode *N);
361
362 void SelectPtrauthAuth(SDNode *N);
363 void SelectPtrauthResign(SDNode *N);
364
365 bool trySelectStackSlotTagP(SDNode *N);
366 void SelectTagP(SDNode *N);
367
368 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
369 unsigned SubRegIdx);
370 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
371 unsigned SubRegIdx);
372 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
373 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
374 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
375 unsigned Opc_rr, unsigned Opc_ri,
376 bool IsIntr = false);
377 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
378 unsigned Scale, unsigned Opc_ri,
379 unsigned Opc_rr);
380 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
381 bool IsZmMulti, unsigned Opcode,
382 bool HasPred = false);
383 void SelectPExtPair(SDNode *N, unsigned Opc);
384 void SelectWhilePair(SDNode *N, unsigned Opc);
385 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
386 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
387 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
388 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
389 bool IsTupleInput, unsigned Opc);
390 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
391
392 template <unsigned MaxIdx, unsigned Scale>
393 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
394 unsigned Op);
395 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
396 unsigned Op, unsigned MaxIdx, unsigned Scale,
397 unsigned BaseReg = 0);
398 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
399 /// SVE Reg+Imm addressing mode.
400 template <int64_t Min, int64_t Max>
401 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
402 SDValue &OffImm);
403 /// SVE Reg+Reg address mode.
404 template <unsigned Scale>
405 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
406 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
407 }
408
409 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
410 unsigned Opc, uint32_t MaxImm);
411
412 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
413
414 template <unsigned MaxIdx, unsigned Scale>
415 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
416 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
417 }
418
419 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
420 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
421 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
422 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
423 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
424 unsigned Opc_rr, unsigned Opc_ri);
425 std::tuple<unsigned, SDValue, SDValue>
426 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
427 const SDValue &OldBase, const SDValue &OldOffset,
428 unsigned Scale);
429
430 bool tryBitfieldExtractOp(SDNode *N);
431 bool tryBitfieldExtractOpFromSExt(SDNode *N);
432 bool tryBitfieldInsertOp(SDNode *N);
433 bool tryBitfieldInsertInZeroOp(SDNode *N);
434 bool tryShiftAmountMod(SDNode *N);
435
436 bool tryReadRegister(SDNode *N);
437 bool tryWriteRegister(SDNode *N);
438
439 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
440 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
441
442 bool trySelectXAR(SDNode *N);
443
444// Include the pieces autogenerated from the target description.
445#include "AArch64GenDAGISel.inc"
446
447private:
448 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
449 SDValue &Shift);
450 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
451 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
452 SDValue &OffImm) {
453 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
454 }
455 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
456 unsigned Size, SDValue &Base,
457 SDValue &OffImm);
458 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
459 SDValue &OffImm);
460 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
461 SDValue &OffImm);
462 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
463 SDValue &Offset, SDValue &SignExtend,
464 SDValue &DoShift);
465 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
466 SDValue &Offset, SDValue &SignExtend,
467 SDValue &DoShift);
468 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
469 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
470 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
471 SDValue &Offset, SDValue &SignExtend);
472
473 template<unsigned RegWidth>
474 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
475 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
476 }
477
478 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
479
480 template<unsigned RegWidth>
481 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
482 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
483 }
484
485 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
486 unsigned Width);
487
488 bool SelectCMP_SWAP(SDNode *N);
489
490 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
491 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
492 bool Negate);
493 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
494 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
495
496 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
497 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
498 bool AllowSaturation, SDValue &Imm);
499
500 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
501 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
502 SDValue &Offset);
503 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
504 SDValue &Offset, unsigned Scale = 1);
505
506 bool SelectAllActivePredicate(SDValue N);
507 bool SelectAnyPredicate(SDValue N);
508};
509
510class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
511public:
512 static char ID;
513 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
514 CodeGenOptLevel OptLevel)
516 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
517};
518} // end anonymous namespace
519
520char AArch64DAGToDAGISelLegacy::ID = 0;
521
522INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
523
524/// isIntImmediate - This method tests to see if the node is a constant
525/// operand. If so Imm will receive the 32-bit value.
526static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
527 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
528 Imm = C->getZExtValue();
529 return true;
530 }
531 return false;
532}
533
534// isIntImmediate - This method tests to see if a constant operand.
535// If so Imm will receive the value.
536static bool isIntImmediate(SDValue N, uint64_t &Imm) {
537 return isIntImmediate(N.getNode(), Imm);
538}
539
540// isOpcWithIntImmediate - This method tests to see if the node is a specific
541// opcode and that it has a immediate integer right operand.
542// If so Imm will receive the 32 bit value.
543static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
544 uint64_t &Imm) {
545 return N->getOpcode() == Opc &&
546 isIntImmediate(N->getOperand(1).getNode(), Imm);
547}
548
549// isIntImmediateEq - This method tests to see if N is a constant operand that
550// is equivalent to 'ImmExpected'.
551#ifndef NDEBUG
552static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
553 uint64_t Imm;
554 if (!isIntImmediate(N.getNode(), Imm))
555 return false;
556 return Imm == ImmExpected;
557}
558#endif
559
560bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
561 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
562 std::vector<SDValue> &OutOps) {
563 switch(ConstraintID) {
564 default:
565 llvm_unreachable("Unexpected asm memory constraint");
566 case InlineAsm::ConstraintCode::m:
567 case InlineAsm::ConstraintCode::o:
568 case InlineAsm::ConstraintCode::Q:
569 // We need to make sure that this one operand does not end up in XZR, thus
570 // require the address to be in a PointerRegClass register.
571 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
572 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
573 SDLoc dl(Op);
574 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
575 SDValue NewOp =
576 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
577 dl, Op.getValueType(),
578 Op, RC), 0);
579 OutOps.push_back(NewOp);
580 return false;
581 }
582 return true;
583}
584
585/// SelectArithImmed - Select an immediate value that can be represented as
586/// a 12-bit value shifted left by either 0 or 12. If so, return true with
587/// Val set to the 12-bit value and Shift set to the shifter operand.
588bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
589 SDValue &Shift) {
590 // This function is called from the addsub_shifted_imm ComplexPattern,
591 // which lists [imm] as the list of opcode it's interested in, however
592 // we still need to check whether the operand is actually an immediate
593 // here because the ComplexPattern opcode list is only used in
594 // root-level opcode matching.
595 if (!isa<ConstantSDNode>(N.getNode()))
596 return false;
597
598 uint64_t Immed = N.getNode()->getAsZExtVal();
599 unsigned ShiftAmt;
600
601 if (Immed >> 12 == 0) {
602 ShiftAmt = 0;
603 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
604 ShiftAmt = 12;
605 Immed = Immed >> 12;
606 } else
607 return false;
608
609 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
610 SDLoc dl(N);
611 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
612 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
613 return true;
614}
615
616/// SelectNegArithImmed - As above, but negates the value before trying to
617/// select it.
618bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
619 SDValue &Shift) {
620 // This function is called from the addsub_shifted_imm ComplexPattern,
621 // which lists [imm] as the list of opcode it's interested in, however
622 // we still need to check whether the operand is actually an immediate
623 // here because the ComplexPattern opcode list is only used in
624 // root-level opcode matching.
625 if (!isa<ConstantSDNode>(N.getNode()))
626 return false;
627
628 // The immediate operand must be a 24-bit zero-extended immediate.
629 uint64_t Immed = N.getNode()->getAsZExtVal();
630
631 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
632 // have the opposite effect on the C flag, so this pattern mustn't match under
633 // those circumstances.
634 if (Immed == 0)
635 return false;
636
637 if (N.getValueType() == MVT::i32)
638 Immed = ~((uint32_t)Immed) + 1;
639 else
640 Immed = ~Immed + 1ULL;
641 if (Immed & 0xFFFFFFFFFF000000ULL)
642 return false;
643
644 Immed &= 0xFFFFFFULL;
645 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
646 Shift);
647}
648
649/// getShiftTypeForNode - Translate a shift node to the corresponding
650/// ShiftType value.
652 switch (N.getOpcode()) {
653 default:
655 case ISD::SHL:
656 return AArch64_AM::LSL;
657 case ISD::SRL:
658 return AArch64_AM::LSR;
659 case ISD::SRA:
660 return AArch64_AM::ASR;
661 case ISD::ROTR:
662 return AArch64_AM::ROR;
663 }
664}
665
666/// Determine whether it is worth it to fold SHL into the addressing
667/// mode.
669 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
670 // It is worth folding logical shift of up to three places.
671 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
672 if (!CSD)
673 return false;
674 unsigned ShiftVal = CSD->getZExtValue();
675 if (ShiftVal > 3)
676 return false;
677
678 // Check if this particular node is reused in any non-memory related
679 // operation. If yes, do not try to fold this node into the address
680 // computation, since the computation will be kept.
681 const SDNode *Node = V.getNode();
682 for (SDNode *UI : Node->users())
683 if (!isa<MemSDNode>(*UI))
684 for (SDNode *UII : UI->users())
685 if (!isa<MemSDNode>(*UII))
686 return false;
687 return true;
688}
689
690/// Determine whether it is worth to fold V into an extended register addressing
691/// mode.
692bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
693 // Trivial if we are optimizing for code size or if there is only
694 // one use of the value.
695 if (CurDAG->shouldOptForSize() || V.hasOneUse())
696 return true;
697
698 // If a subtarget has a slow shift, folding a shift into multiple loads
699 // costs additional micro-ops.
700 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
701 return false;
702
703 // Check whether we're going to emit the address arithmetic anyway because
704 // it's used by a non-address operation.
705 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
706 return true;
707 if (V.getOpcode() == ISD::ADD) {
708 const SDValue LHS = V.getOperand(0);
709 const SDValue RHS = V.getOperand(1);
710 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
711 return true;
712 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
713 return true;
714 }
715
716 // It hurts otherwise, since the value will be reused.
717 return false;
718}
719
720/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
721/// to select more shifted register
722bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
723 SDValue &Shift) {
724 EVT VT = N.getValueType();
725 if (VT != MVT::i32 && VT != MVT::i64)
726 return false;
727
728 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
729 return false;
730 SDValue LHS = N.getOperand(0);
731 if (!LHS->hasOneUse())
732 return false;
733
734 unsigned LHSOpcode = LHS->getOpcode();
735 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
736 return false;
737
738 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
739 if (!ShiftAmtNode)
740 return false;
741
742 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
743 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
744 if (!RHSC)
745 return false;
746
747 APInt AndMask = RHSC->getAPIntValue();
748 unsigned LowZBits, MaskLen;
749 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
750 return false;
751
752 unsigned BitWidth = N.getValueSizeInBits();
753 SDLoc DL(LHS);
754 uint64_t NewShiftC;
755 unsigned NewShiftOp;
756 if (LHSOpcode == ISD::SHL) {
757 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
758 // BitWidth != LowZBits + MaskLen doesn't match the pattern
759 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
760 return false;
761
762 NewShiftC = LowZBits - ShiftAmtC;
763 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
764 } else {
765 if (LowZBits == 0)
766 return false;
767
768 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
769 NewShiftC = LowZBits + ShiftAmtC;
770 if (NewShiftC >= BitWidth)
771 return false;
772
773 // SRA need all high bits
774 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
775 return false;
776
777 // SRL high bits can be 0 or 1
778 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
779 return false;
780
781 if (LHSOpcode == ISD::SRL)
782 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
783 else
784 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
785 }
786
787 assert(NewShiftC < BitWidth && "Invalid shift amount");
788 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
789 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
790 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
791 NewShiftAmt, BitWidthMinus1),
792 0);
793 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
794 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
795 return true;
796}
797
798/// getExtendTypeForNode - Translate an extend node to the corresponding
799/// ExtendType value.
801getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
802 if (N.getOpcode() == ISD::SIGN_EXTEND ||
803 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
804 EVT SrcVT;
805 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
806 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
807 else
808 SrcVT = N.getOperand(0).getValueType();
809
810 if (!IsLoadStore && SrcVT == MVT::i8)
811 return AArch64_AM::SXTB;
812 else if (!IsLoadStore && SrcVT == MVT::i16)
813 return AArch64_AM::SXTH;
814 else if (SrcVT == MVT::i32)
815 return AArch64_AM::SXTW;
816 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
817
819 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
820 N.getOpcode() == ISD::ANY_EXTEND) {
821 EVT SrcVT = N.getOperand(0).getValueType();
822 if (!IsLoadStore && SrcVT == MVT::i8)
823 return AArch64_AM::UXTB;
824 else if (!IsLoadStore && SrcVT == MVT::i16)
825 return AArch64_AM::UXTH;
826 else if (SrcVT == MVT::i32)
827 return AArch64_AM::UXTW;
828 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
829
831 } else if (N.getOpcode() == ISD::AND) {
832 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
833 if (!CSD)
835 uint64_t AndMask = CSD->getZExtValue();
836
837 switch (AndMask) {
838 default:
840 case 0xFF:
841 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
842 case 0xFFFF:
843 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
844 case 0xFFFFFFFF:
845 return AArch64_AM::UXTW;
846 }
847 }
848
850}
851
852/// Determine whether it is worth to fold V into an extended register of an
853/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
854/// instruction, and the shift should be treated as worth folding even if has
855/// multiple uses.
856bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
857 // Trivial if we are optimizing for code size or if there is only
858 // one use of the value.
859 if (CurDAG->shouldOptForSize() || V.hasOneUse())
860 return true;
861
862 // If a subtarget has a fastpath LSL we can fold a logical shift into
863 // the add/sub and save a cycle.
864 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
865 V.getConstantOperandVal(1) <= 4 &&
867 return true;
868
869 // It hurts otherwise, since the value will be reused.
870 return false;
871}
872
873/// SelectShiftedRegister - Select a "shifted register" operand. If the value
874/// is not shifted, set the Shift operand to default of "LSL 0". The logical
875/// instructions allow the shifted register to be rotated, but the arithmetic
876/// instructions do not. The AllowROR parameter specifies whether ROR is
877/// supported.
878bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
879 SDValue &Reg, SDValue &Shift) {
880 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
881 return true;
882
884 if (ShType == AArch64_AM::InvalidShiftExtend)
885 return false;
886 if (!AllowROR && ShType == AArch64_AM::ROR)
887 return false;
888
889 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
890 unsigned BitSize = N.getValueSizeInBits();
891 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
892 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
893
894 Reg = N.getOperand(0);
895 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
896 return isWorthFoldingALU(N, true);
897 }
898
899 return false;
900}
901
902/// Instructions that accept extend modifiers like UXTW expect the register
903/// being extended to be a GPR32, but the incoming DAG might be acting on a
904/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
905/// this is the case.
907 if (N.getValueType() == MVT::i32)
908 return N;
909
910 SDLoc dl(N);
911 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
912}
913
914// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
915template<signed Low, signed High, signed Scale>
916bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
917 if (!isa<ConstantSDNode>(N))
918 return false;
919
920 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
921 if ((MulImm % std::abs(Scale)) == 0) {
922 int64_t RDVLImm = MulImm / Scale;
923 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
924 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
925 return true;
926 }
927 }
928
929 return false;
930}
931
932/// SelectArithExtendedRegister - Select a "extended register" operand. This
933/// operand folds in an extend followed by an optional left shift.
934bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
935 SDValue &Shift) {
936 unsigned ShiftVal = 0;
938
939 if (N.getOpcode() == ISD::SHL) {
940 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
941 if (!CSD)
942 return false;
943 ShiftVal = CSD->getZExtValue();
944 if (ShiftVal > 4)
945 return false;
946
947 Ext = getExtendTypeForNode(N.getOperand(0));
949 return false;
950
951 Reg = N.getOperand(0).getOperand(0);
952 } else {
955 return false;
956
957 Reg = N.getOperand(0);
958
959 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
960 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
961 auto isDef32 = [](SDValue N) {
962 unsigned Opc = N.getOpcode();
963 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
964 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
965 Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
966 Opc != ISD::FREEZE;
967 };
968 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
969 isDef32(Reg))
970 return false;
971 }
972
973 // AArch64 mandates that the RHS of the operation must use the smallest
974 // register class that could contain the size being extended from. Thus,
975 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
976 // there might not be an actual 32-bit value in the program. We can
977 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
978 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
979 Reg = narrowIfNeeded(CurDAG, Reg);
980 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
981 MVT::i32);
982 return isWorthFoldingALU(N);
983}
984
985/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
986/// operand is refered by the instructions have SP operand
987bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
988 SDValue &Shift) {
989 unsigned ShiftVal = 0;
991
992 if (N.getOpcode() != ISD::SHL)
993 return false;
994
995 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
996 if (!CSD)
997 return false;
998 ShiftVal = CSD->getZExtValue();
999 if (ShiftVal > 4)
1000 return false;
1001
1003 Reg = N.getOperand(0);
1004 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1005 MVT::i32);
1006 return isWorthFoldingALU(N);
1007}
1008
1009/// If there's a use of this ADDlow that's not itself a load/store then we'll
1010/// need to create a real ADD instruction from it anyway and there's no point in
1011/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1012/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1013/// leads to duplicated ADRP instructions.
1015 for (auto *User : N->users()) {
1016 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1017 User->getOpcode() != ISD::ATOMIC_LOAD &&
1018 User->getOpcode() != ISD::ATOMIC_STORE)
1019 return false;
1020
1021 // ldar and stlr have much more restrictive addressing modes (just a
1022 // register).
1023 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1024 return false;
1025 }
1026
1027 return true;
1028}
1029
1030/// Check if the immediate offset is valid as a scaled immediate.
1031static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1032 unsigned Size) {
1033 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1034 Offset < (Range << Log2_32(Size)))
1035 return true;
1036 return false;
1037}
1038
1039/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1040/// immediate" address. The "Size" argument is the size in bytes of the memory
1041/// reference, which determines the scale.
1042bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1043 unsigned BW, unsigned Size,
1044 SDValue &Base,
1045 SDValue &OffImm) {
1046 SDLoc dl(N);
1047 const DataLayout &DL = CurDAG->getDataLayout();
1048 const TargetLowering *TLI = getTargetLowering();
1049 if (N.getOpcode() == ISD::FrameIndex) {
1050 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1051 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1052 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1053 return true;
1054 }
1055
1056 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1057 // selected here doesn't support labels/immediates, only base+offset.
1058 if (CurDAG->isBaseWithConstantOffset(N)) {
1059 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1060 if (IsSignedImm) {
1061 int64_t RHSC = RHS->getSExtValue();
1062 unsigned Scale = Log2_32(Size);
1063 int64_t Range = 0x1LL << (BW - 1);
1064
1065 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1066 RHSC < (Range << Scale)) {
1067 Base = N.getOperand(0);
1068 if (Base.getOpcode() == ISD::FrameIndex) {
1069 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1070 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1071 }
1072 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1073 return true;
1074 }
1075 } else {
1076 // unsigned Immediate
1077 uint64_t RHSC = RHS->getZExtValue();
1078 unsigned Scale = Log2_32(Size);
1079 uint64_t Range = 0x1ULL << BW;
1080
1081 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1082 Base = N.getOperand(0);
1083 if (Base.getOpcode() == ISD::FrameIndex) {
1084 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1085 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1086 }
1087 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1088 return true;
1089 }
1090 }
1091 }
1092 }
1093 // Base only. The address will be materialized into a register before
1094 // the memory is accessed.
1095 // add x0, Xbase, #offset
1096 // stp x1, x2, [x0]
1097 Base = N;
1098 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1099 return true;
1100}
1101
1102/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1103/// immediate" address. The "Size" argument is the size in bytes of the memory
1104/// reference, which determines the scale.
1105bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1106 SDValue &Base, SDValue &OffImm) {
1107 SDLoc dl(N);
1108 const DataLayout &DL = CurDAG->getDataLayout();
1109 const TargetLowering *TLI = getTargetLowering();
1110 if (N.getOpcode() == ISD::FrameIndex) {
1111 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1112 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1113 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1114 return true;
1115 }
1116
1117 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1118 GlobalAddressSDNode *GAN =
1119 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1120 Base = N.getOperand(0);
1121 OffImm = N.getOperand(1);
1122 if (!GAN)
1123 return true;
1124
1125 if (GAN->getOffset() % Size == 0 &&
1127 return true;
1128 }
1129
1130 if (CurDAG->isBaseWithConstantOffset(N)) {
1131 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1132 int64_t RHSC = (int64_t)RHS->getZExtValue();
1133 unsigned Scale = Log2_32(Size);
1134 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1135 Base = N.getOperand(0);
1136 if (Base.getOpcode() == ISD::FrameIndex) {
1137 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1138 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1139 }
1140 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1141 return true;
1142 }
1143 }
1144 }
1145
1146 // Before falling back to our general case, check if the unscaled
1147 // instructions can handle this. If so, that's preferable.
1148 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1149 return false;
1150
1151 // Base only. The address will be materialized into a register before
1152 // the memory is accessed.
1153 // add x0, Xbase, #offset
1154 // ldr x0, [x0]
1155 Base = N;
1156 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1157 return true;
1158}
1159
1160/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1161/// immediate" address. This should only match when there is an offset that
1162/// is not valid for a scaled immediate addressing mode. The "Size" argument
1163/// is the size in bytes of the memory reference, which is needed here to know
1164/// what is valid for a scaled immediate.
1165bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1166 SDValue &Base,
1167 SDValue &OffImm) {
1168 if (!CurDAG->isBaseWithConstantOffset(N))
1169 return false;
1170 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1171 int64_t RHSC = RHS->getSExtValue();
1172 if (RHSC >= -256 && RHSC < 256) {
1173 Base = N.getOperand(0);
1174 if (Base.getOpcode() == ISD::FrameIndex) {
1175 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1176 const TargetLowering *TLI = getTargetLowering();
1177 Base = CurDAG->getTargetFrameIndex(
1178 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1179 }
1180 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1181 return true;
1182 }
1183 }
1184 return false;
1185}
1186
1188 SDLoc dl(N);
1189 SDValue ImpDef = SDValue(
1190 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1191 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1192 N);
1193}
1194
1195/// Check if the given SHL node (\p N), can be used to form an
1196/// extended register for an addressing mode.
1197bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1198 bool WantExtend, SDValue &Offset,
1199 SDValue &SignExtend) {
1200 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1201 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1202 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1203 return false;
1204
1205 SDLoc dl(N);
1206 if (WantExtend) {
1208 getExtendTypeForNode(N.getOperand(0), true);
1210 return false;
1211
1212 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1213 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1214 MVT::i32);
1215 } else {
1216 Offset = N.getOperand(0);
1217 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1218 }
1219
1220 unsigned LegalShiftVal = Log2_32(Size);
1221 unsigned ShiftVal = CSD->getZExtValue();
1222
1223 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1224 return false;
1225
1226 return isWorthFoldingAddr(N, Size);
1227}
1228
1229bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1231 SDValue &SignExtend,
1232 SDValue &DoShift) {
1233 if (N.getOpcode() != ISD::ADD)
1234 return false;
1235 SDValue LHS = N.getOperand(0);
1236 SDValue RHS = N.getOperand(1);
1237 SDLoc dl(N);
1238
1239 // We don't want to match immediate adds here, because they are better lowered
1240 // to the register-immediate addressing modes.
1241 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1242 return false;
1243
1244 // Check if this particular node is reused in any non-memory related
1245 // operation. If yes, do not try to fold this node into the address
1246 // computation, since the computation will be kept.
1247 const SDNode *Node = N.getNode();
1248 for (SDNode *UI : Node->users()) {
1249 if (!isa<MemSDNode>(*UI))
1250 return false;
1251 }
1252
1253 // Remember if it is worth folding N when it produces extended register.
1254 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1255
1256 // Try to match a shifted extend on the RHS.
1257 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1258 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1259 Base = LHS;
1260 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1261 return true;
1262 }
1263
1264 // Try to match a shifted extend on the LHS.
1265 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1266 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1267 Base = RHS;
1268 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1269 return true;
1270 }
1271
1272 // There was no shift, whatever else we find.
1273 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1274
1276 // Try to match an unshifted extend on the LHS.
1277 if (IsExtendedRegisterWorthFolding &&
1278 (Ext = getExtendTypeForNode(LHS, true)) !=
1280 Base = RHS;
1281 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1282 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1283 MVT::i32);
1284 if (isWorthFoldingAddr(LHS, Size))
1285 return true;
1286 }
1287
1288 // Try to match an unshifted extend on the RHS.
1289 if (IsExtendedRegisterWorthFolding &&
1290 (Ext = getExtendTypeForNode(RHS, true)) !=
1292 Base = LHS;
1293 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1294 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1295 MVT::i32);
1296 if (isWorthFoldingAddr(RHS, Size))
1297 return true;
1298 }
1299
1300 return false;
1301}
1302
1303// Check if the given immediate is preferred by ADD. If an immediate can be
1304// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1305// encoded by one MOVZ, return true.
1306static bool isPreferredADD(int64_t ImmOff) {
1307 // Constant in [0x0, 0xfff] can be encoded in ADD.
1308 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1309 return true;
1310 // Check if it can be encoded in an "ADD LSL #12".
1311 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1312 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1313 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1314 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1315 return false;
1316}
1317
1318bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1320 SDValue &SignExtend,
1321 SDValue &DoShift) {
1322 if (N.getOpcode() != ISD::ADD)
1323 return false;
1324 SDValue LHS = N.getOperand(0);
1325 SDValue RHS = N.getOperand(1);
1326 SDLoc DL(N);
1327
1328 // Check if this particular node is reused in any non-memory related
1329 // operation. If yes, do not try to fold this node into the address
1330 // computation, since the computation will be kept.
1331 const SDNode *Node = N.getNode();
1332 for (SDNode *UI : Node->users()) {
1333 if (!isa<MemSDNode>(*UI))
1334 return false;
1335 }
1336
1337 // Watch out if RHS is a wide immediate, it can not be selected into
1338 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1339 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1340 // instructions like:
1341 // MOV X0, WideImmediate
1342 // ADD X1, BaseReg, X0
1343 // LDR X2, [X1, 0]
1344 // For such situation, using [BaseReg, XReg] addressing mode can save one
1345 // ADD/SUB:
1346 // MOV X0, WideImmediate
1347 // LDR X2, [BaseReg, X0]
1348 if (isa<ConstantSDNode>(RHS)) {
1349 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1350 // Skip the immediate can be selected by load/store addressing mode.
1351 // Also skip the immediate can be encoded by a single ADD (SUB is also
1352 // checked by using -ImmOff).
1353 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1354 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1355 return false;
1356
1357 SDValue Ops[] = { RHS };
1358 SDNode *MOVI =
1359 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1360 SDValue MOVIV = SDValue(MOVI, 0);
1361 // This ADD of two X register will be selected into [Reg+Reg] mode.
1362 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1363 }
1364
1365 // Remember if it is worth folding N when it produces extended register.
1366 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1367
1368 // Try to match a shifted extend on the RHS.
1369 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1370 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1371 Base = LHS;
1372 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1373 return true;
1374 }
1375
1376 // Try to match a shifted extend on the LHS.
1377 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1378 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1379 Base = RHS;
1380 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1381 return true;
1382 }
1383
1384 // Match any non-shifted, non-extend, non-immediate add expression.
1385 Base = LHS;
1386 Offset = RHS;
1387 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1388 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1389 // Reg1 + Reg2 is free: no check needed.
1390 return true;
1391}
1392
1393SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1394 static const unsigned RegClassIDs[] = {
1395 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1396 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1397 AArch64::dsub2, AArch64::dsub3};
1398
1399 return createTuple(Regs, RegClassIDs, SubRegs);
1400}
1401
1402SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1403 static const unsigned RegClassIDs[] = {
1404 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1405 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1406 AArch64::qsub2, AArch64::qsub3};
1407
1408 return createTuple(Regs, RegClassIDs, SubRegs);
1409}
1410
1411SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1412 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1413 AArch64::ZPR3RegClassID,
1414 AArch64::ZPR4RegClassID};
1415 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1416 AArch64::zsub2, AArch64::zsub3};
1417
1418 return createTuple(Regs, RegClassIDs, SubRegs);
1419}
1420
1421SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1422 assert(Regs.size() == 2 || Regs.size() == 4);
1423
1424 // The createTuple interface requires 3 RegClassIDs for each possible
1425 // tuple type even though we only have them for ZPR2 and ZPR4.
1426 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1427 AArch64::ZPR4Mul4RegClassID};
1428 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1429 AArch64::zsub2, AArch64::zsub3};
1430 return createTuple(Regs, RegClassIDs, SubRegs);
1431}
1432
1433SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1434 const unsigned RegClassIDs[],
1435 const unsigned SubRegs[]) {
1436 // There's no special register-class for a vector-list of 1 element: it's just
1437 // a vector.
1438 if (Regs.size() == 1)
1439 return Regs[0];
1440
1441 assert(Regs.size() >= 2 && Regs.size() <= 4);
1442
1443 SDLoc DL(Regs[0]);
1444
1446
1447 // First operand of REG_SEQUENCE is the desired RegClass.
1448 Ops.push_back(
1449 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1450
1451 // Then we get pairs of source & subregister-position for the components.
1452 for (unsigned i = 0; i < Regs.size(); ++i) {
1453 Ops.push_back(Regs[i]);
1454 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1455 }
1456
1457 SDNode *N =
1458 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1459 return SDValue(N, 0);
1460}
1461
1462void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1463 bool isExt) {
1464 SDLoc dl(N);
1465 EVT VT = N->getValueType(0);
1466
1467 unsigned ExtOff = isExt;
1468
1469 // Form a REG_SEQUENCE to force register allocation.
1470 unsigned Vec0Off = ExtOff + 1;
1471 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1472 N->op_begin() + Vec0Off + NumVecs);
1473 SDValue RegSeq = createQTuple(Regs);
1474
1476 if (isExt)
1477 Ops.push_back(N->getOperand(1));
1478 Ops.push_back(RegSeq);
1479 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1480 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1481}
1482
1483static std::tuple<SDValue, SDValue>
1485 SDLoc DL(Disc);
1486 SDValue AddrDisc;
1487 SDValue ConstDisc;
1488
1489 // If this is a blend, remember the constant and address discriminators.
1490 // Otherwise, it's either a constant discriminator, or a non-blended
1491 // address discriminator.
1492 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1493 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1494 AddrDisc = Disc->getOperand(1);
1495 ConstDisc = Disc->getOperand(2);
1496 } else {
1497 ConstDisc = Disc;
1498 }
1499
1500 // If the constant discriminator (either the blend RHS, or the entire
1501 // discriminator value) isn't a 16-bit constant, bail out, and let the
1502 // discriminator be computed separately.
1503 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1504 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1505 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1506
1507 // If there's no address discriminator, use XZR directly.
1508 if (!AddrDisc)
1509 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1510
1511 return std::make_tuple(
1512 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1513 AddrDisc);
1514}
1515
1516void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1517 SDLoc DL(N);
1518 // IntrinsicID is operand #0
1519 SDValue Val = N->getOperand(1);
1520 SDValue AUTKey = N->getOperand(2);
1521 SDValue AUTDisc = N->getOperand(3);
1522
1523 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1524 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1525
1526 SDValue AUTAddrDisc, AUTConstDisc;
1527 std::tie(AUTConstDisc, AUTAddrDisc) =
1528 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1529
1530 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1531 AArch64::X16, Val, SDValue());
1532 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1533
1534 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUT, DL, MVT::i64, Ops);
1535 ReplaceNode(N, AUT);
1536}
1537
1538void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1539 SDLoc DL(N);
1540 // IntrinsicID is operand #0
1541 SDValue Val = N->getOperand(1);
1542 SDValue AUTKey = N->getOperand(2);
1543 SDValue AUTDisc = N->getOperand(3);
1544 SDValue PACKey = N->getOperand(4);
1545 SDValue PACDisc = N->getOperand(5);
1546
1547 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1548 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1549
1550 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1551 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1552
1553 SDValue AUTAddrDisc, AUTConstDisc;
1554 std::tie(AUTConstDisc, AUTAddrDisc) =
1555 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1556
1557 SDValue PACAddrDisc, PACConstDisc;
1558 std::tie(PACConstDisc, PACAddrDisc) =
1559 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1560
1561 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1562 AArch64::X16, Val, SDValue());
1563
1564 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1565 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1566
1567 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1568 ReplaceNode(N, AUTPAC);
1569}
1570
1571bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1572 LoadSDNode *LD = cast<LoadSDNode>(N);
1573 if (LD->isUnindexed())
1574 return false;
1575 EVT VT = LD->getMemoryVT();
1576 EVT DstVT = N->getValueType(0);
1577 ISD::MemIndexedMode AM = LD->getAddressingMode();
1578 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1579
1580 // We're not doing validity checking here. That was done when checking
1581 // if we should mark the load as indexed or not. We're just selecting
1582 // the right instruction.
1583 unsigned Opcode = 0;
1584
1585 ISD::LoadExtType ExtType = LD->getExtensionType();
1586 bool InsertTo64 = false;
1587 if (VT == MVT::i64)
1588 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1589 else if (VT == MVT::i32) {
1590 if (ExtType == ISD::NON_EXTLOAD)
1591 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1592 else if (ExtType == ISD::SEXTLOAD)
1593 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1594 else {
1595 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1596 InsertTo64 = true;
1597 // The result of the load is only i32. It's the subreg_to_reg that makes
1598 // it into an i64.
1599 DstVT = MVT::i32;
1600 }
1601 } else if (VT == MVT::i16) {
1602 if (ExtType == ISD::SEXTLOAD) {
1603 if (DstVT == MVT::i64)
1604 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1605 else
1606 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1607 } else {
1608 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1609 InsertTo64 = DstVT == MVT::i64;
1610 // The result of the load is only i32. It's the subreg_to_reg that makes
1611 // it into an i64.
1612 DstVT = MVT::i32;
1613 }
1614 } else if (VT == MVT::i8) {
1615 if (ExtType == ISD::SEXTLOAD) {
1616 if (DstVT == MVT::i64)
1617 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1618 else
1619 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1620 } else {
1621 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1622 InsertTo64 = DstVT == MVT::i64;
1623 // The result of the load is only i32. It's the subreg_to_reg that makes
1624 // it into an i64.
1625 DstVT = MVT::i32;
1626 }
1627 } else if (VT == MVT::f16) {
1628 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1629 } else if (VT == MVT::bf16) {
1630 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1631 } else if (VT == MVT::f32) {
1632 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1633 } else if (VT == MVT::f64 || VT.is64BitVector()) {
1634 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1635 } else if (VT.is128BitVector()) {
1636 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1637 } else
1638 return false;
1639 SDValue Chain = LD->getChain();
1640 SDValue Base = LD->getBasePtr();
1641 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1642 int OffsetVal = (int)OffsetOp->getZExtValue();
1643 SDLoc dl(N);
1644 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1645 SDValue Ops[] = { Base, Offset, Chain };
1646 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1647 MVT::Other, Ops);
1648
1649 // Transfer memoperands.
1650 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1651 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1652
1653 // Either way, we're replacing the node, so tell the caller that.
1654 SDValue LoadedVal = SDValue(Res, 1);
1655 if (InsertTo64) {
1656 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1657 LoadedVal =
1658 SDValue(CurDAG->getMachineNode(
1659 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1660 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1661 SubReg),
1662 0);
1663 }
1664
1665 ReplaceUses(SDValue(N, 0), LoadedVal);
1666 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1667 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1668 CurDAG->RemoveDeadNode(N);
1669 return true;
1670}
1671
1672void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1673 unsigned SubRegIdx) {
1674 SDLoc dl(N);
1675 EVT VT = N->getValueType(0);
1676 SDValue Chain = N->getOperand(0);
1677
1678 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1679 Chain};
1680
1681 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1682
1683 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1684 SDValue SuperReg = SDValue(Ld, 0);
1685 for (unsigned i = 0; i < NumVecs; ++i)
1686 ReplaceUses(SDValue(N, i),
1687 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1688
1689 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1690
1691 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1692 // because it's too simple to have needed special treatment during lowering.
1693 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1694 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1695 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1696 }
1697
1698 CurDAG->RemoveDeadNode(N);
1699}
1700
1701void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1702 unsigned Opc, unsigned SubRegIdx) {
1703 SDLoc dl(N);
1704 EVT VT = N->getValueType(0);
1705 SDValue Chain = N->getOperand(0);
1706
1707 SDValue Ops[] = {N->getOperand(1), // Mem operand
1708 N->getOperand(2), // Incremental
1709 Chain};
1710
1711 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1712 MVT::Untyped, MVT::Other};
1713
1714 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1715
1716 // Update uses of write back register
1717 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1718
1719 // Update uses of vector list
1720 SDValue SuperReg = SDValue(Ld, 1);
1721 if (NumVecs == 1)
1722 ReplaceUses(SDValue(N, 0), SuperReg);
1723 else
1724 for (unsigned i = 0; i < NumVecs; ++i)
1725 ReplaceUses(SDValue(N, i),
1726 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1727
1728 // Update the chain
1729 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1730 CurDAG->RemoveDeadNode(N);
1731}
1732
1733/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1734/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1735/// new Base and an SDValue representing the new offset.
1736std::tuple<unsigned, SDValue, SDValue>
1737AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1738 unsigned Opc_ri,
1739 const SDValue &OldBase,
1740 const SDValue &OldOffset,
1741 unsigned Scale) {
1742 SDValue NewBase = OldBase;
1743 SDValue NewOffset = OldOffset;
1744 // Detect a possible Reg+Imm addressing mode.
1745 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1746 N, OldBase, NewBase, NewOffset);
1747
1748 // Detect a possible reg+reg addressing mode, but only if we haven't already
1749 // detected a Reg+Imm one.
1750 const bool IsRegReg =
1751 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1752
1753 // Select the instruction.
1754 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1755}
1756
1757enum class SelectTypeKind {
1758 Int1 = 0,
1759 Int = 1,
1760 FP = 2,
1761 AnyType = 3,
1762};
1763
1764/// This function selects an opcode from a list of opcodes, which is
1765/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1766/// element types, in this order.
1767template <SelectTypeKind Kind>
1768static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1769 // Only match scalable vector VTs
1770 if (!VT.isScalableVector())
1771 return 0;
1772
1773 EVT EltVT = VT.getVectorElementType();
1774 unsigned Key = VT.getVectorMinNumElements();
1775 switch (Kind) {
1777 break;
1779 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1780 EltVT != MVT::i64)
1781 return 0;
1782 break;
1784 if (EltVT != MVT::i1)
1785 return 0;
1786 break;
1787 case SelectTypeKind::FP:
1788 if (EltVT == MVT::bf16)
1789 Key = 16;
1790 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1791 EltVT != MVT::f64)
1792 return 0;
1793 break;
1794 }
1795
1796 unsigned Offset;
1797 switch (Key) {
1798 case 16: // 8-bit or bf16
1799 Offset = 0;
1800 break;
1801 case 8: // 16-bit
1802 Offset = 1;
1803 break;
1804 case 4: // 32-bit
1805 Offset = 2;
1806 break;
1807 case 2: // 64-bit
1808 Offset = 3;
1809 break;
1810 default:
1811 return 0;
1812 }
1813
1814 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1815}
1816
1817// This function is almost identical to SelectWhilePair, but has an
1818// extra check on the range of the immediate operand.
1819// TODO: Merge these two functions together at some point?
1820void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1821 // Immediate can be either 0 or 1.
1822 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1823 if (Imm->getZExtValue() > 1)
1824 return;
1825
1826 SDLoc DL(N);
1827 EVT VT = N->getValueType(0);
1828 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1829 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1830 SDValue SuperReg = SDValue(WhilePair, 0);
1831
1832 for (unsigned I = 0; I < 2; ++I)
1833 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1834 AArch64::psub0 + I, DL, VT, SuperReg));
1835
1836 CurDAG->RemoveDeadNode(N);
1837}
1838
1839void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1840 SDLoc DL(N);
1841 EVT VT = N->getValueType(0);
1842
1843 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1844
1845 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1846 SDValue SuperReg = SDValue(WhilePair, 0);
1847
1848 for (unsigned I = 0; I < 2; ++I)
1849 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1850 AArch64::psub0 + I, DL, VT, SuperReg));
1851
1852 CurDAG->RemoveDeadNode(N);
1853}
1854
1855void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1856 unsigned Opcode) {
1857 EVT VT = N->getValueType(0);
1858 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1859 SDValue Ops = createZTuple(Regs);
1860 SDLoc DL(N);
1861 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1862 SDValue SuperReg = SDValue(Intrinsic, 0);
1863 for (unsigned i = 0; i < NumVecs; ++i)
1864 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1865 AArch64::zsub0 + i, DL, VT, SuperReg));
1866
1867 CurDAG->RemoveDeadNode(N);
1868}
1869
1870void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
1871 unsigned Opcode) {
1872 SDLoc DL(N);
1873 EVT VT = N->getValueType(0);
1874 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
1875 Ops.push_back(/*Chain*/ N->getOperand(0));
1876
1878 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
1879 SDValue SuperReg = SDValue(Instruction, 0);
1880
1881 for (unsigned i = 0; i < NumVecs; ++i)
1882 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1883 AArch64::zsub0 + i, DL, VT, SuperReg));
1884
1885 // Copy chain
1886 unsigned ChainIdx = NumVecs;
1887 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
1888 CurDAG->RemoveDeadNode(N);
1889}
1890
1891void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1892 unsigned NumVecs,
1893 bool IsZmMulti,
1894 unsigned Opcode,
1895 bool HasPred) {
1896 assert(Opcode != 0 && "Unexpected opcode");
1897
1898 SDLoc DL(N);
1899 EVT VT = N->getValueType(0);
1900 unsigned FirstVecIdx = HasPred ? 2 : 1;
1901
1902 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1903 SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs));
1904 return createZMulTuple(Regs);
1905 };
1906
1907 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1908
1909 SDValue Zm;
1910 if (IsZmMulti)
1911 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1912 else
1913 Zm = N->getOperand(NumVecs + FirstVecIdx);
1914
1916 if (HasPred)
1917 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1918 N->getOperand(1), Zdn, Zm);
1919 else
1920 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1921 SDValue SuperReg = SDValue(Intrinsic, 0);
1922 for (unsigned i = 0; i < NumVecs; ++i)
1923 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1924 AArch64::zsub0 + i, DL, VT, SuperReg));
1925
1926 CurDAG->RemoveDeadNode(N);
1927}
1928
1929void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1930 unsigned Scale, unsigned Opc_ri,
1931 unsigned Opc_rr, bool IsIntr) {
1932 assert(Scale < 5 && "Invalid scaling value.");
1933 SDLoc DL(N);
1934 EVT VT = N->getValueType(0);
1935 SDValue Chain = N->getOperand(0);
1936
1937 // Optimize addressing mode.
1939 unsigned Opc;
1940 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1941 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1942 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1943
1944 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
1945 Base, // Memory operand
1946 Offset, Chain};
1947
1948 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1949
1950 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1951 SDValue SuperReg = SDValue(Load, 0);
1952 for (unsigned i = 0; i < NumVecs; ++i)
1953 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1954 AArch64::zsub0 + i, DL, VT, SuperReg));
1955
1956 // Copy chain
1957 unsigned ChainIdx = NumVecs;
1958 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1959 CurDAG->RemoveDeadNode(N);
1960}
1961
1962void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
1963 unsigned NumVecs,
1964 unsigned Scale,
1965 unsigned Opc_ri,
1966 unsigned Opc_rr) {
1967 assert(Scale < 4 && "Invalid scaling value.");
1968 SDLoc DL(N);
1969 EVT VT = N->getValueType(0);
1970 SDValue Chain = N->getOperand(0);
1971
1972 SDValue PNg = N->getOperand(2);
1973 SDValue Base = N->getOperand(3);
1974 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
1975 unsigned Opc;
1976 std::tie(Opc, Base, Offset) =
1977 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
1978
1979 SDValue Ops[] = {PNg, // Predicate-as-counter
1980 Base, // Memory operand
1981 Offset, Chain};
1982
1983 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1984
1985 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1986 SDValue SuperReg = SDValue(Load, 0);
1987 for (unsigned i = 0; i < NumVecs; ++i)
1988 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1989 AArch64::zsub0 + i, DL, VT, SuperReg));
1990
1991 // Copy chain
1992 unsigned ChainIdx = NumVecs;
1993 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1994 CurDAG->RemoveDeadNode(N);
1995}
1996
1997void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
1998 unsigned Opcode) {
1999 if (N->getValueType(0) != MVT::nxv4f32)
2000 return;
2001 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2002}
2003
2004void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2005 unsigned NumOutVecs,
2006 unsigned Opc,
2007 uint32_t MaxImm) {
2008 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2009 if (Imm->getZExtValue() > MaxImm)
2010 return;
2011
2012 SDValue ZtValue;
2013 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2014 return;
2015
2016 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
2017 SDLoc DL(Node);
2018 EVT VT = Node->getValueType(0);
2019
2021 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2022 SDValue SuperReg = SDValue(Instruction, 0);
2023
2024 for (unsigned I = 0; I < NumOutVecs; ++I)
2025 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2026 AArch64::zsub0 + I, DL, VT, SuperReg));
2027
2028 // Copy chain
2029 unsigned ChainIdx = NumOutVecs;
2030 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2031 CurDAG->RemoveDeadNode(Node);
2032}
2033
2034void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2035 unsigned NumOutVecs,
2036 unsigned Opc) {
2037
2038 SDValue ZtValue;
2040 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2041 return;
2042
2043 Ops.push_back(ZtValue);
2044 Ops.push_back(createZMulTuple({Node->getOperand(3), Node->getOperand(4)}));
2045 SDLoc DL(Node);
2046 EVT VT = Node->getValueType(0);
2047
2049 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2050 SDValue SuperReg = SDValue(Instruction, 0);
2051
2052 for (unsigned I = 0; I < NumOutVecs; ++I)
2053 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2054 AArch64::zsub0 + I, DL, VT, SuperReg));
2055
2056 // Copy chain
2057 unsigned ChainIdx = NumOutVecs;
2058 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2059 CurDAG->RemoveDeadNode(Node);
2060}
2061
2062void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2063 unsigned Op) {
2064 SDLoc DL(N);
2065 EVT VT = N->getValueType(0);
2066
2067 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2068 SDValue Zd = createZMulTuple(Regs);
2069 SDValue Zn = N->getOperand(1 + NumVecs);
2070 SDValue Zm = N->getOperand(2 + NumVecs);
2071
2072 SDValue Ops[] = {Zd, Zn, Zm};
2073
2074 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2075 SDValue SuperReg = SDValue(Intrinsic, 0);
2076 for (unsigned i = 0; i < NumVecs; ++i)
2077 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2078 AArch64::zsub0 + i, DL, VT, SuperReg));
2079
2080 CurDAG->RemoveDeadNode(N);
2081}
2082
2083bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2084 switch (BaseReg) {
2085 default:
2086 return false;
2087 case AArch64::ZA:
2088 case AArch64::ZAB0:
2089 if (TileNum == 0)
2090 break;
2091 return false;
2092 case AArch64::ZAH0:
2093 if (TileNum <= 1)
2094 break;
2095 return false;
2096 case AArch64::ZAS0:
2097 if (TileNum <= 3)
2098 break;
2099 return false;
2100 case AArch64::ZAD0:
2101 if (TileNum <= 7)
2102 break;
2103 return false;
2104 }
2105
2106 BaseReg += TileNum;
2107 return true;
2108}
2109
2110template <unsigned MaxIdx, unsigned Scale>
2111void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2112 unsigned BaseReg, unsigned Op) {
2113 unsigned TileNum = 0;
2114 if (BaseReg != AArch64::ZA)
2115 TileNum = N->getConstantOperandVal(2);
2116
2117 if (!SelectSMETile(BaseReg, TileNum))
2118 return;
2119
2120 SDValue SliceBase, Base, Offset;
2121 if (BaseReg == AArch64::ZA)
2122 SliceBase = N->getOperand(2);
2123 else
2124 SliceBase = N->getOperand(3);
2125
2126 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2127 return;
2128
2129 SDLoc DL(N);
2130 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2131 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2132 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2133
2134 EVT VT = N->getValueType(0);
2135 for (unsigned I = 0; I < NumVecs; ++I)
2136 ReplaceUses(SDValue(N, I),
2137 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2138 SDValue(Mov, 0)));
2139 // Copy chain
2140 unsigned ChainIdx = NumVecs;
2141 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2142 CurDAG->RemoveDeadNode(N);
2143}
2144
2145void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2146 unsigned Op, unsigned MaxIdx,
2147 unsigned Scale, unsigned BaseReg) {
2148 // Slice can be in different positions
2149 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2150 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2151 SDValue SliceBase = N->getOperand(2);
2152 if (BaseReg != AArch64::ZA)
2153 SliceBase = N->getOperand(3);
2154
2156 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2157 return;
2158 // The correct Za tile number is computed in Machine Instruction
2159 // See EmitZAInstr
2160 // DAG cannot select Za tile as an output register with ZReg
2161 SDLoc DL(N);
2163 if (BaseReg != AArch64::ZA )
2164 Ops.push_back(N->getOperand(2));
2165 Ops.push_back(Base);
2166 Ops.push_back(Offset);
2167 Ops.push_back(N->getOperand(0)); //Chain
2168 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2169
2170 EVT VT = N->getValueType(0);
2171 for (unsigned I = 0; I < NumVecs; ++I)
2172 ReplaceUses(SDValue(N, I),
2173 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2174 SDValue(Mov, 0)));
2175
2176 // Copy chain
2177 unsigned ChainIdx = NumVecs;
2178 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2179 CurDAG->RemoveDeadNode(N);
2180}
2181
2182void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2183 unsigned NumOutVecs,
2184 bool IsTupleInput,
2185 unsigned Opc) {
2186 SDLoc DL(N);
2187 EVT VT = N->getValueType(0);
2188 unsigned NumInVecs = N->getNumOperands() - 1;
2189
2191 if (IsTupleInput) {
2192 assert((NumInVecs == 2 || NumInVecs == 4) &&
2193 "Don't know how to handle multi-register input!");
2194 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2195 Ops.push_back(createZMulTuple(Regs));
2196 } else {
2197 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2198 for (unsigned I = 0; I < NumInVecs; I++)
2199 Ops.push_back(N->getOperand(1 + I));
2200 }
2201
2202 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2203 SDValue SuperReg = SDValue(Res, 0);
2204
2205 for (unsigned I = 0; I < NumOutVecs; I++)
2206 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2207 AArch64::zsub0 + I, DL, VT, SuperReg));
2208 CurDAG->RemoveDeadNode(N);
2209}
2210
2211void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2212 unsigned Opc) {
2213 SDLoc dl(N);
2214 EVT VT = N->getOperand(2)->getValueType(0);
2215
2216 // Form a REG_SEQUENCE to force register allocation.
2217 bool Is128Bit = VT.getSizeInBits() == 128;
2218 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2219 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2220
2221 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2222 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2223
2224 // Transfer memoperands.
2225 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2226 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2227
2228 ReplaceNode(N, St);
2229}
2230
2231void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2232 unsigned Scale, unsigned Opc_rr,
2233 unsigned Opc_ri) {
2234 SDLoc dl(N);
2235
2236 // Form a REG_SEQUENCE to force register allocation.
2237 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2238 SDValue RegSeq = createZTuple(Regs);
2239
2240 // Optimize addressing mode.
2241 unsigned Opc;
2243 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2244 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2245 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2246
2247 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2248 Base, // address
2249 Offset, // offset
2250 N->getOperand(0)}; // chain
2251 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2252
2253 ReplaceNode(N, St);
2254}
2255
2256bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2257 SDValue &OffImm) {
2258 SDLoc dl(N);
2259 const DataLayout &DL = CurDAG->getDataLayout();
2260 const TargetLowering *TLI = getTargetLowering();
2261
2262 // Try to match it for the frame address
2263 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2264 int FI = FINode->getIndex();
2265 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2266 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2267 return true;
2268 }
2269
2270 return false;
2271}
2272
2273void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2274 unsigned Opc) {
2275 SDLoc dl(N);
2276 EVT VT = N->getOperand(2)->getValueType(0);
2277 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2278 MVT::Other}; // Type for the Chain
2279
2280 // Form a REG_SEQUENCE to force register allocation.
2281 bool Is128Bit = VT.getSizeInBits() == 128;
2282 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2283 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2284
2285 SDValue Ops[] = {RegSeq,
2286 N->getOperand(NumVecs + 1), // base register
2287 N->getOperand(NumVecs + 2), // Incremental
2288 N->getOperand(0)}; // Chain
2289 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2290
2291 ReplaceNode(N, St);
2292}
2293
2294namespace {
2295/// WidenVector - Given a value in the V64 register class, produce the
2296/// equivalent value in the V128 register class.
2297class WidenVector {
2298 SelectionDAG &DAG;
2299
2300public:
2301 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2302
2303 SDValue operator()(SDValue V64Reg) {
2304 EVT VT = V64Reg.getValueType();
2305 unsigned NarrowSize = VT.getVectorNumElements();
2306 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2307 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2308 SDLoc DL(V64Reg);
2309
2310 SDValue Undef =
2311 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2312 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2313 }
2314};
2315} // namespace
2316
2317/// NarrowVector - Given a value in the V128 register class, produce the
2318/// equivalent value in the V64 register class.
2320 EVT VT = V128Reg.getValueType();
2321 unsigned WideSize = VT.getVectorNumElements();
2322 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2323 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2324
2325 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2326 V128Reg);
2327}
2328
2329void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2330 unsigned Opc) {
2331 SDLoc dl(N);
2332 EVT VT = N->getValueType(0);
2333 bool Narrow = VT.getSizeInBits() == 64;
2334
2335 // Form a REG_SEQUENCE to force register allocation.
2336 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2337
2338 if (Narrow)
2339 transform(Regs, Regs.begin(),
2340 WidenVector(*CurDAG));
2341
2342 SDValue RegSeq = createQTuple(Regs);
2343
2344 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2345
2346 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2347
2348 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2349 N->getOperand(NumVecs + 3), N->getOperand(0)};
2350 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2351 SDValue SuperReg = SDValue(Ld, 0);
2352
2353 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2354 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2355 AArch64::qsub2, AArch64::qsub3 };
2356 for (unsigned i = 0; i < NumVecs; ++i) {
2357 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2358 if (Narrow)
2359 NV = NarrowVector(NV, *CurDAG);
2360 ReplaceUses(SDValue(N, i), NV);
2361 }
2362
2363 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2364 CurDAG->RemoveDeadNode(N);
2365}
2366
2367void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2368 unsigned Opc) {
2369 SDLoc dl(N);
2370 EVT VT = N->getValueType(0);
2371 bool Narrow = VT.getSizeInBits() == 64;
2372
2373 // Form a REG_SEQUENCE to force register allocation.
2374 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2375
2376 if (Narrow)
2377 transform(Regs, Regs.begin(),
2378 WidenVector(*CurDAG));
2379
2380 SDValue RegSeq = createQTuple(Regs);
2381
2382 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2383 RegSeq->getValueType(0), MVT::Other};
2384
2385 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2386
2387 SDValue Ops[] = {RegSeq,
2388 CurDAG->getTargetConstant(LaneNo, dl,
2389 MVT::i64), // Lane Number
2390 N->getOperand(NumVecs + 2), // Base register
2391 N->getOperand(NumVecs + 3), // Incremental
2392 N->getOperand(0)};
2393 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2394
2395 // Update uses of the write back register
2396 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2397
2398 // Update uses of the vector list
2399 SDValue SuperReg = SDValue(Ld, 1);
2400 if (NumVecs == 1) {
2401 ReplaceUses(SDValue(N, 0),
2402 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2403 } else {
2404 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2405 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2406 AArch64::qsub2, AArch64::qsub3 };
2407 for (unsigned i = 0; i < NumVecs; ++i) {
2408 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2409 SuperReg);
2410 if (Narrow)
2411 NV = NarrowVector(NV, *CurDAG);
2412 ReplaceUses(SDValue(N, i), NV);
2413 }
2414 }
2415
2416 // Update the Chain
2417 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2418 CurDAG->RemoveDeadNode(N);
2419}
2420
2421void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2422 unsigned Opc) {
2423 SDLoc dl(N);
2424 EVT VT = N->getOperand(2)->getValueType(0);
2425 bool Narrow = VT.getSizeInBits() == 64;
2426
2427 // Form a REG_SEQUENCE to force register allocation.
2428 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2429
2430 if (Narrow)
2431 transform(Regs, Regs.begin(),
2432 WidenVector(*CurDAG));
2433
2434 SDValue RegSeq = createQTuple(Regs);
2435
2436 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2437
2438 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2439 N->getOperand(NumVecs + 3), N->getOperand(0)};
2440 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2441
2442 // Transfer memoperands.
2443 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2444 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2445
2446 ReplaceNode(N, St);
2447}
2448
2449void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2450 unsigned Opc) {
2451 SDLoc dl(N);
2452 EVT VT = N->getOperand(2)->getValueType(0);
2453 bool Narrow = VT.getSizeInBits() == 64;
2454
2455 // Form a REG_SEQUENCE to force register allocation.
2456 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2457
2458 if (Narrow)
2459 transform(Regs, Regs.begin(),
2460 WidenVector(*CurDAG));
2461
2462 SDValue RegSeq = createQTuple(Regs);
2463
2464 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2465 MVT::Other};
2466
2467 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2468
2469 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2470 N->getOperand(NumVecs + 2), // Base Register
2471 N->getOperand(NumVecs + 3), // Incremental
2472 N->getOperand(0)};
2473 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2474
2475 // Transfer memoperands.
2476 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2477 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2478
2479 ReplaceNode(N, St);
2480}
2481
2483 unsigned &Opc, SDValue &Opd0,
2484 unsigned &LSB, unsigned &MSB,
2485 unsigned NumberOfIgnoredLowBits,
2486 bool BiggerPattern) {
2487 assert(N->getOpcode() == ISD::AND &&
2488 "N must be a AND operation to call this function");
2489
2490 EVT VT = N->getValueType(0);
2491
2492 // Here we can test the type of VT and return false when the type does not
2493 // match, but since it is done prior to that call in the current context
2494 // we turned that into an assert to avoid redundant code.
2495 assert((VT == MVT::i32 || VT == MVT::i64) &&
2496 "Type checking must have been done before calling this function");
2497
2498 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2499 // changed the AND node to a 32-bit mask operation. We'll have to
2500 // undo that as part of the transform here if we want to catch all
2501 // the opportunities.
2502 // Currently the NumberOfIgnoredLowBits argument helps to recover
2503 // from these situations when matching bigger pattern (bitfield insert).
2504
2505 // For unsigned extracts, check for a shift right and mask
2506 uint64_t AndImm = 0;
2507 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2508 return false;
2509
2510 const SDNode *Op0 = N->getOperand(0).getNode();
2511
2512 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2513 // simplified. Try to undo that
2514 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2515
2516 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2517 if (AndImm & (AndImm + 1))
2518 return false;
2519
2520 bool ClampMSB = false;
2521 uint64_t SrlImm = 0;
2522 // Handle the SRL + ANY_EXTEND case.
2523 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2524 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2525 // Extend the incoming operand of the SRL to 64-bit.
2526 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2527 // Make sure to clamp the MSB so that we preserve the semantics of the
2528 // original operations.
2529 ClampMSB = true;
2530 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2532 SrlImm)) {
2533 // If the shift result was truncated, we can still combine them.
2534 Opd0 = Op0->getOperand(0).getOperand(0);
2535
2536 // Use the type of SRL node.
2537 VT = Opd0->getValueType(0);
2538 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2539 Opd0 = Op0->getOperand(0);
2540 ClampMSB = (VT == MVT::i32);
2541 } else if (BiggerPattern) {
2542 // Let's pretend a 0 shift right has been performed.
2543 // The resulting code will be at least as good as the original one
2544 // plus it may expose more opportunities for bitfield insert pattern.
2545 // FIXME: Currently we limit this to the bigger pattern, because
2546 // some optimizations expect AND and not UBFM.
2547 Opd0 = N->getOperand(0);
2548 } else
2549 return false;
2550
2551 // Bail out on large immediates. This happens when no proper
2552 // combining/constant folding was performed.
2553 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2554 LLVM_DEBUG(
2555 (dbgs() << N
2556 << ": Found large shift immediate, this should not happen\n"));
2557 return false;
2558 }
2559
2560 LSB = SrlImm;
2561 MSB = SrlImm +
2562 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2563 : llvm::countr_one<uint64_t>(AndImm)) -
2564 1;
2565 if (ClampMSB)
2566 // Since we're moving the extend before the right shift operation, we need
2567 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2568 // the zeros which would get shifted in with the original right shift
2569 // operation.
2570 MSB = MSB > 31 ? 31 : MSB;
2571
2572 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2573 return true;
2574}
2575
2576static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2577 SDValue &Opd0, unsigned &Immr,
2578 unsigned &Imms) {
2579 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2580
2581 EVT VT = N->getValueType(0);
2582 unsigned BitWidth = VT.getSizeInBits();
2583 assert((VT == MVT::i32 || VT == MVT::i64) &&
2584 "Type checking must have been done before calling this function");
2585
2586 SDValue Op = N->getOperand(0);
2587 if (Op->getOpcode() == ISD::TRUNCATE) {
2588 Op = Op->getOperand(0);
2589 VT = Op->getValueType(0);
2590 BitWidth = VT.getSizeInBits();
2591 }
2592
2593 uint64_t ShiftImm;
2594 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2595 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2596 return false;
2597
2598 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2599 if (ShiftImm + Width > BitWidth)
2600 return false;
2601
2602 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2603 Opd0 = Op.getOperand(0);
2604 Immr = ShiftImm;
2605 Imms = ShiftImm + Width - 1;
2606 return true;
2607}
2608
2609static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2610 SDValue &Opd0, unsigned &LSB,
2611 unsigned &MSB) {
2612 // We are looking for the following pattern which basically extracts several
2613 // continuous bits from the source value and places it from the LSB of the
2614 // destination value, all other bits of the destination value or set to zero:
2615 //
2616 // Value2 = AND Value, MaskImm
2617 // SRL Value2, ShiftImm
2618 //
2619 // with MaskImm >> ShiftImm to search for the bit width.
2620 //
2621 // This gets selected into a single UBFM:
2622 //
2623 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2624 //
2625
2626 if (N->getOpcode() != ISD::SRL)
2627 return false;
2628
2629 uint64_t AndMask = 0;
2630 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2631 return false;
2632
2633 Opd0 = N->getOperand(0).getOperand(0);
2634
2635 uint64_t SrlImm = 0;
2636 if (!isIntImmediate(N->getOperand(1), SrlImm))
2637 return false;
2638
2639 // Check whether we really have several bits extract here.
2640 if (!isMask_64(AndMask >> SrlImm))
2641 return false;
2642
2643 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2644 LSB = SrlImm;
2645 MSB = llvm::Log2_64(AndMask);
2646 return true;
2647}
2648
2649static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2650 unsigned &Immr, unsigned &Imms,
2651 bool BiggerPattern) {
2652 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2653 "N must be a SHR/SRA operation to call this function");
2654
2655 EVT VT = N->getValueType(0);
2656
2657 // Here we can test the type of VT and return false when the type does not
2658 // match, but since it is done prior to that call in the current context
2659 // we turned that into an assert to avoid redundant code.
2660 assert((VT == MVT::i32 || VT == MVT::i64) &&
2661 "Type checking must have been done before calling this function");
2662
2663 // Check for AND + SRL doing several bits extract.
2664 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2665 return true;
2666
2667 // We're looking for a shift of a shift.
2668 uint64_t ShlImm = 0;
2669 uint64_t TruncBits = 0;
2670 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2671 Opd0 = N->getOperand(0).getOperand(0);
2672 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2673 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2674 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2675 // be considered as setting high 32 bits as zero. Our strategy here is to
2676 // always generate 64bit UBFM. This consistency will help the CSE pass
2677 // later find more redundancy.
2678 Opd0 = N->getOperand(0).getOperand(0);
2679 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2680 VT = Opd0.getValueType();
2681 assert(VT == MVT::i64 && "the promoted type should be i64");
2682 } else if (BiggerPattern) {
2683 // Let's pretend a 0 shift left has been performed.
2684 // FIXME: Currently we limit this to the bigger pattern case,
2685 // because some optimizations expect AND and not UBFM
2686 Opd0 = N->getOperand(0);
2687 } else
2688 return false;
2689
2690 // Missing combines/constant folding may have left us with strange
2691 // constants.
2692 if (ShlImm >= VT.getSizeInBits()) {
2693 LLVM_DEBUG(
2694 (dbgs() << N
2695 << ": Found large shift immediate, this should not happen\n"));
2696 return false;
2697 }
2698
2699 uint64_t SrlImm = 0;
2700 if (!isIntImmediate(N->getOperand(1), SrlImm))
2701 return false;
2702
2703 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2704 "bad amount in shift node!");
2705 int immr = SrlImm - ShlImm;
2706 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2707 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2708 // SRA requires a signed extraction
2709 if (VT == MVT::i32)
2710 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2711 else
2712 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2713 return true;
2714}
2715
2716bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2717 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2718
2719 EVT VT = N->getValueType(0);
2720 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2721 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2722 return false;
2723
2724 uint64_t ShiftImm;
2725 SDValue Op = N->getOperand(0);
2726 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2727 return false;
2728
2729 SDLoc dl(N);
2730 // Extend the incoming operand of the shift to 64-bits.
2731 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2732 unsigned Immr = ShiftImm;
2733 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2734 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2735 CurDAG->getTargetConstant(Imms, dl, VT)};
2736 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2737 return true;
2738}
2739
2740static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2741 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2742 unsigned NumberOfIgnoredLowBits = 0,
2743 bool BiggerPattern = false) {
2744 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2745 return false;
2746
2747 switch (N->getOpcode()) {
2748 default:
2749 if (!N->isMachineOpcode())
2750 return false;
2751 break;
2752 case ISD::AND:
2753 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2754 NumberOfIgnoredLowBits, BiggerPattern);
2755 case ISD::SRL:
2756 case ISD::SRA:
2757 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2758
2760 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2761 }
2762
2763 unsigned NOpc = N->getMachineOpcode();
2764 switch (NOpc) {
2765 default:
2766 return false;
2767 case AArch64::SBFMWri:
2768 case AArch64::UBFMWri:
2769 case AArch64::SBFMXri:
2770 case AArch64::UBFMXri:
2771 Opc = NOpc;
2772 Opd0 = N->getOperand(0);
2773 Immr = N->getConstantOperandVal(1);
2774 Imms = N->getConstantOperandVal(2);
2775 return true;
2776 }
2777 // Unreachable
2778 return false;
2779}
2780
2781bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2782 unsigned Opc, Immr, Imms;
2783 SDValue Opd0;
2784 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2785 return false;
2786
2787 EVT VT = N->getValueType(0);
2788 SDLoc dl(N);
2789
2790 // If the bit extract operation is 64bit but the original type is 32bit, we
2791 // need to add one EXTRACT_SUBREG.
2792 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2793 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2794 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2795
2796 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2797 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2798 MVT::i32, SDValue(BFM, 0));
2799 ReplaceNode(N, Inner.getNode());
2800 return true;
2801 }
2802
2803 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2804 CurDAG->getTargetConstant(Imms, dl, VT)};
2805 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2806 return true;
2807}
2808
2809/// Does DstMask form a complementary pair with the mask provided by
2810/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2811/// this asks whether DstMask zeroes precisely those bits that will be set by
2812/// the other half.
2813static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2814 unsigned NumberOfIgnoredHighBits, EVT VT) {
2815 assert((VT == MVT::i32 || VT == MVT::i64) &&
2816 "i32 or i64 mask type expected!");
2817 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2818
2819 // Enable implicitTrunc as we're intentionally ignoring high bits.
2820 APInt SignificantDstMask =
2821 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
2822 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2823
2824 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2825 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2826}
2827
2828// Look for bits that will be useful for later uses.
2829// A bit is consider useless as soon as it is dropped and never used
2830// before it as been dropped.
2831// E.g., looking for useful bit of x
2832// 1. y = x & 0x7
2833// 2. z = y >> 2
2834// After #1, x useful bits are 0x7, then the useful bits of x, live through
2835// y.
2836// After #2, the useful bits of x are 0x4.
2837// However, if x is used on an unpredicatable instruction, then all its bits
2838// are useful.
2839// E.g.
2840// 1. y = x & 0x7
2841// 2. z = y >> 2
2842// 3. str x, [@x]
2843static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2844
2846 unsigned Depth) {
2847 uint64_t Imm =
2848 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2849 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2850 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2851 getUsefulBits(Op, UsefulBits, Depth + 1);
2852}
2853
2855 uint64_t Imm, uint64_t MSB,
2856 unsigned Depth) {
2857 // inherit the bitwidth value
2858 APInt OpUsefulBits(UsefulBits);
2859 OpUsefulBits = 1;
2860
2861 if (MSB >= Imm) {
2862 OpUsefulBits <<= MSB - Imm + 1;
2863 --OpUsefulBits;
2864 // The interesting part will be in the lower part of the result
2865 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2866 // The interesting part was starting at Imm in the argument
2867 OpUsefulBits <<= Imm;
2868 } else {
2869 OpUsefulBits <<= MSB + 1;
2870 --OpUsefulBits;
2871 // The interesting part will be shifted in the result
2872 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2873 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2874 // The interesting part was at zero in the argument
2875 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2876 }
2877
2878 UsefulBits &= OpUsefulBits;
2879}
2880
2881static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2882 unsigned Depth) {
2883 uint64_t Imm =
2884 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2885 uint64_t MSB =
2886 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2887
2888 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2889}
2890
2892 unsigned Depth) {
2893 uint64_t ShiftTypeAndValue =
2894 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2895 APInt Mask(UsefulBits);
2896 Mask.clearAllBits();
2897 Mask.flipAllBits();
2898
2899 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2900 // Shift Left
2901 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2902 Mask <<= ShiftAmt;
2903 getUsefulBits(Op, Mask, Depth + 1);
2904 Mask.lshrInPlace(ShiftAmt);
2905 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2906 // Shift Right
2907 // We do not handle AArch64_AM::ASR, because the sign will change the
2908 // number of useful bits
2909 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2910 Mask.lshrInPlace(ShiftAmt);
2911 getUsefulBits(Op, Mask, Depth + 1);
2912 Mask <<= ShiftAmt;
2913 } else
2914 return;
2915
2916 UsefulBits &= Mask;
2917}
2918
2919static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2920 unsigned Depth) {
2921 uint64_t Imm =
2922 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2923 uint64_t MSB =
2924 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2925
2926 APInt OpUsefulBits(UsefulBits);
2927 OpUsefulBits = 1;
2928
2929 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2930 ResultUsefulBits.flipAllBits();
2931 APInt Mask(UsefulBits.getBitWidth(), 0);
2932
2933 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2934
2935 if (MSB >= Imm) {
2936 // The instruction is a BFXIL.
2937 uint64_t Width = MSB - Imm + 1;
2938 uint64_t LSB = Imm;
2939
2940 OpUsefulBits <<= Width;
2941 --OpUsefulBits;
2942
2943 if (Op.getOperand(1) == Orig) {
2944 // Copy the low bits from the result to bits starting from LSB.
2945 Mask = ResultUsefulBits & OpUsefulBits;
2946 Mask <<= LSB;
2947 }
2948
2949 if (Op.getOperand(0) == Orig)
2950 // Bits starting from LSB in the input contribute to the result.
2951 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2952 } else {
2953 // The instruction is a BFI.
2954 uint64_t Width = MSB + 1;
2955 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2956
2957 OpUsefulBits <<= Width;
2958 --OpUsefulBits;
2959 OpUsefulBits <<= LSB;
2960
2961 if (Op.getOperand(1) == Orig) {
2962 // Copy the bits from the result to the zero bits.
2963 Mask = ResultUsefulBits & OpUsefulBits;
2964 Mask.lshrInPlace(LSB);
2965 }
2966
2967 if (Op.getOperand(0) == Orig)
2968 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2969 }
2970
2971 UsefulBits &= Mask;
2972}
2973
2974static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2975 SDValue Orig, unsigned Depth) {
2976
2977 // Users of this node should have already been instruction selected
2978 // FIXME: Can we turn that into an assert?
2979 if (!UserNode->isMachineOpcode())
2980 return;
2981
2982 switch (UserNode->getMachineOpcode()) {
2983 default:
2984 return;
2985 case AArch64::ANDSWri:
2986 case AArch64::ANDSXri:
2987 case AArch64::ANDWri:
2988 case AArch64::ANDXri:
2989 // We increment Depth only when we call the getUsefulBits
2990 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2991 Depth);
2992 case AArch64::UBFMWri:
2993 case AArch64::UBFMXri:
2994 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2995
2996 case AArch64::ORRWrs:
2997 case AArch64::ORRXrs:
2998 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
2999 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3000 Depth);
3001 return;
3002 case AArch64::BFMWri:
3003 case AArch64::BFMXri:
3004 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3005
3006 case AArch64::STRBBui:
3007 case AArch64::STURBBi:
3008 if (UserNode->getOperand(0) != Orig)
3009 return;
3010 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3011 return;
3012
3013 case AArch64::STRHHui:
3014 case AArch64::STURHHi:
3015 if (UserNode->getOperand(0) != Orig)
3016 return;
3017 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3018 return;
3019 }
3020}
3021
3022static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3024 return;
3025 // Initialize UsefulBits
3026 if (!Depth) {
3027 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3028 // At the beginning, assume every produced bits is useful
3029 UsefulBits = APInt(Bitwidth, 0);
3030 UsefulBits.flipAllBits();
3031 }
3032 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3033
3034 for (SDNode *Node : Op.getNode()->users()) {
3035 // A use cannot produce useful bits
3036 APInt UsefulBitsForUse = APInt(UsefulBits);
3037 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3038 UsersUsefulBits |= UsefulBitsForUse;
3039 }
3040 // UsefulBits contains the produced bits that are meaningful for the
3041 // current definition, thus a user cannot make a bit meaningful at
3042 // this point
3043 UsefulBits &= UsersUsefulBits;
3044}
3045
3046/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3047/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3048/// 0, return Op unchanged.
3049static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3050 if (ShlAmount == 0)
3051 return Op;
3052
3053 EVT VT = Op.getValueType();
3054 SDLoc dl(Op);
3055 unsigned BitWidth = VT.getSizeInBits();
3056 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3057
3058 SDNode *ShiftNode;
3059 if (ShlAmount > 0) {
3060 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3061 ShiftNode = CurDAG->getMachineNode(
3062 UBFMOpc, dl, VT, Op,
3063 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3064 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3065 } else {
3066 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3067 assert(ShlAmount < 0 && "expected right shift");
3068 int ShrAmount = -ShlAmount;
3069 ShiftNode = CurDAG->getMachineNode(
3070 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3071 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3072 }
3073
3074 return SDValue(ShiftNode, 0);
3075}
3076
3077// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3079 bool BiggerPattern,
3080 const uint64_t NonZeroBits,
3081 SDValue &Src, int &DstLSB,
3082 int &Width);
3083
3084// For bit-field-positioning pattern "shl VAL, N)".
3086 bool BiggerPattern,
3087 const uint64_t NonZeroBits,
3088 SDValue &Src, int &DstLSB,
3089 int &Width);
3090
3091/// Does this tree qualify as an attempt to move a bitfield into position,
3092/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3094 bool BiggerPattern, SDValue &Src,
3095 int &DstLSB, int &Width) {
3096 EVT VT = Op.getValueType();
3097 unsigned BitWidth = VT.getSizeInBits();
3098 (void)BitWidth;
3099 assert(BitWidth == 32 || BitWidth == 64);
3100
3101 KnownBits Known = CurDAG->computeKnownBits(Op);
3102
3103 // Non-zero in the sense that they're not provably zero, which is the key
3104 // point if we want to use this value
3105 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3106 if (!isShiftedMask_64(NonZeroBits))
3107 return false;
3108
3109 switch (Op.getOpcode()) {
3110 default:
3111 break;
3112 case ISD::AND:
3113 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3114 NonZeroBits, Src, DstLSB, Width);
3115 case ISD::SHL:
3116 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3117 NonZeroBits, Src, DstLSB, Width);
3118 }
3119
3120 return false;
3121}
3122
3124 bool BiggerPattern,
3125 const uint64_t NonZeroBits,
3126 SDValue &Src, int &DstLSB,
3127 int &Width) {
3128 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3129
3130 EVT VT = Op.getValueType();
3131 assert((VT == MVT::i32 || VT == MVT::i64) &&
3132 "Caller guarantees VT is one of i32 or i64");
3133 (void)VT;
3134
3135 uint64_t AndImm;
3136 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3137 return false;
3138
3139 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3140 // 1) (AndImm & (1 << POS) == 0)
3141 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3142 //
3143 // 1) and 2) don't agree so something must be wrong (e.g., in
3144 // 'SelectionDAG::computeKnownBits')
3145 assert((~AndImm & NonZeroBits) == 0 &&
3146 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3147
3148 SDValue AndOp0 = Op.getOperand(0);
3149
3150 uint64_t ShlImm;
3151 SDValue ShlOp0;
3152 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3153 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3154 ShlOp0 = AndOp0.getOperand(0);
3155 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3157 ShlImm)) {
3158 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3159
3160 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3161 SDValue ShlVal = AndOp0.getOperand(0);
3162
3163 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3164 // expect VT to be MVT::i32.
3165 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3166
3167 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3168 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3169 } else
3170 return false;
3171
3172 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3173 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3174 // AndOp0+AND.
3175 if (!BiggerPattern && !AndOp0.hasOneUse())
3176 return false;
3177
3178 DstLSB = llvm::countr_zero(NonZeroBits);
3179 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3180
3181 // Bail out on large Width. This happens when no proper combining / constant
3182 // folding was performed.
3183 if (Width >= (int)VT.getSizeInBits()) {
3184 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3185 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3186 // "val".
3187 // If VT is i32, what Width >= 32 means:
3188 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3189 // demands at least 'Width' bits (after dag-combiner). This together with
3190 // `any_extend` Op (undefined higher bits) indicates missed combination
3191 // when lowering the 'and' IR instruction to an machine IR instruction.
3192 LLVM_DEBUG(
3193 dbgs()
3194 << "Found large Width in bit-field-positioning -- this indicates no "
3195 "proper combining / constant folding was performed\n");
3196 return false;
3197 }
3198
3199 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3200 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3201 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3202 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3203 // which case it is not profitable to insert an extra shift.
3204 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3205 return false;
3206
3207 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3208 return true;
3209}
3210
3211// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3212// UBFIZ.
3214 SDValue &Src, int &DstLSB,
3215 int &Width) {
3216 // Caller should have verified that N is a left shift with constant shift
3217 // amount; asserts that.
3218 assert(Op.getOpcode() == ISD::SHL &&
3219 "Op.getNode() should be a SHL node to call this function");
3220 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3221 "Op.getNode() should shift ShlImm to call this function");
3222
3223 uint64_t AndImm = 0;
3224 SDValue Op0 = Op.getOperand(0);
3225 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3226 return false;
3227
3228 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3229 if (isMask_64(ShiftedAndImm)) {
3230 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3231 // should end with Mask, and could be prefixed with random bits if those
3232 // bits are shifted out.
3233 //
3234 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3235 // the AND result corresponding to those bits are shifted out, so it's fine
3236 // to not extract them.
3237 Width = llvm::countr_one(ShiftedAndImm);
3238 DstLSB = ShlImm;
3239 Src = Op0.getOperand(0);
3240 return true;
3241 }
3242 return false;
3243}
3244
3246 bool BiggerPattern,
3247 const uint64_t NonZeroBits,
3248 SDValue &Src, int &DstLSB,
3249 int &Width) {
3250 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3251
3252 EVT VT = Op.getValueType();
3253 assert((VT == MVT::i32 || VT == MVT::i64) &&
3254 "Caller guarantees that type is i32 or i64");
3255 (void)VT;
3256
3257 uint64_t ShlImm;
3258 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3259 return false;
3260
3261 if (!BiggerPattern && !Op.hasOneUse())
3262 return false;
3263
3264 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3265 return true;
3266
3267 DstLSB = llvm::countr_zero(NonZeroBits);
3268 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3269
3270 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3271 return false;
3272
3273 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3274 return true;
3275}
3276
3277static bool isShiftedMask(uint64_t Mask, EVT VT) {
3278 assert(VT == MVT::i32 || VT == MVT::i64);
3279 if (VT == MVT::i32)
3280 return isShiftedMask_32(Mask);
3281 return isShiftedMask_64(Mask);
3282}
3283
3284// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3285// inserted only sets known zero bits.
3287 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3288
3289 EVT VT = N->getValueType(0);
3290 if (VT != MVT::i32 && VT != MVT::i64)
3291 return false;
3292
3293 unsigned BitWidth = VT.getSizeInBits();
3294
3295 uint64_t OrImm;
3296 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3297 return false;
3298
3299 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3300 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3301 // performance neutral.
3303 return false;
3304
3305 uint64_t MaskImm;
3306 SDValue And = N->getOperand(0);
3307 // Must be a single use AND with an immediate operand.
3308 if (!And.hasOneUse() ||
3309 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3310 return false;
3311
3312 // Compute the Known Zero for the AND as this allows us to catch more general
3313 // cases than just looking for AND with imm.
3314 KnownBits Known = CurDAG->computeKnownBits(And);
3315
3316 // Non-zero in the sense that they're not provably zero, which is the key
3317 // point if we want to use this value.
3318 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3319
3320 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3321 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3322 return false;
3323
3324 // The bits being inserted must only set those bits that are known to be zero.
3325 if ((OrImm & NotKnownZero) != 0) {
3326 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3327 // currently handle this case.
3328 return false;
3329 }
3330
3331 // BFI/BFXIL dst, src, #lsb, #width.
3332 int LSB = llvm::countr_one(NotKnownZero);
3333 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3334
3335 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3336 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3337 unsigned ImmS = Width - 1;
3338
3339 // If we're creating a BFI instruction avoid cases where we need more
3340 // instructions to materialize the BFI constant as compared to the original
3341 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3342 // should be no worse in this case.
3343 bool IsBFI = LSB != 0;
3344 uint64_t BFIImm = OrImm >> LSB;
3345 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3346 // We have a BFI instruction and we know the constant can't be materialized
3347 // with a ORR-immediate with the zero register.
3348 unsigned OrChunks = 0, BFIChunks = 0;
3349 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3350 if (((OrImm >> Shift) & 0xFFFF) != 0)
3351 ++OrChunks;
3352 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3353 ++BFIChunks;
3354 }
3355 if (BFIChunks > OrChunks)
3356 return false;
3357 }
3358
3359 // Materialize the constant to be inserted.
3360 SDLoc DL(N);
3361 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3362 SDNode *MOVI = CurDAG->getMachineNode(
3363 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3364
3365 // Create the BFI/BFXIL instruction.
3366 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3367 CurDAG->getTargetConstant(ImmR, DL, VT),
3368 CurDAG->getTargetConstant(ImmS, DL, VT)};
3369 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3370 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3371 return true;
3372}
3373
3375 SDValue &ShiftedOperand,
3376 uint64_t &EncodedShiftImm) {
3377 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3378 if (!Dst.hasOneUse())
3379 return false;
3380
3381 EVT VT = Dst.getValueType();
3382 assert((VT == MVT::i32 || VT == MVT::i64) &&
3383 "Caller should guarantee that VT is one of i32 or i64");
3384 const unsigned SizeInBits = VT.getSizeInBits();
3385
3386 SDLoc DL(Dst.getNode());
3387 uint64_t AndImm, ShlImm;
3388 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3389 isShiftedMask_64(AndImm)) {
3390 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3391 SDValue DstOp0 = Dst.getOperand(0);
3392 if (!DstOp0.hasOneUse())
3393 return false;
3394
3395 // An example to illustrate the transformation
3396 // From:
3397 // lsr x8, x1, #1
3398 // and x8, x8, #0x3f80
3399 // bfxil x8, x1, #0, #7
3400 // To:
3401 // and x8, x23, #0x7f
3402 // ubfx x9, x23, #8, #7
3403 // orr x23, x8, x9, lsl #7
3404 //
3405 // The number of instructions remains the same, but ORR is faster than BFXIL
3406 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3407 // the dependency chain is improved after the transformation.
3408 uint64_t SrlImm;
3409 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3410 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3411 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3412 unsigned MaskWidth =
3413 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3414 unsigned UBFMOpc =
3415 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3416 SDNode *UBFMNode = CurDAG->getMachineNode(
3417 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3418 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3419 VT),
3420 CurDAG->getTargetConstant(
3421 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3422 ShiftedOperand = SDValue(UBFMNode, 0);
3423 EncodedShiftImm = AArch64_AM::getShifterImm(
3424 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3425 return true;
3426 }
3427 }
3428 return false;
3429 }
3430
3431 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3432 ShiftedOperand = Dst.getOperand(0);
3433 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3434 return true;
3435 }
3436
3437 uint64_t SrlImm;
3438 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3439 ShiftedOperand = Dst.getOperand(0);
3440 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3441 return true;
3442 }
3443 return false;
3444}
3445
3446// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3447// the operands and select it to AArch64::ORR with shifted registers if
3448// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3449static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3450 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3451 const bool BiggerPattern) {
3452 EVT VT = N->getValueType(0);
3453 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3454 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3455 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3456 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3457 assert((VT == MVT::i32 || VT == MVT::i64) &&
3458 "Expect result type to be i32 or i64 since N is combinable to BFM");
3459 SDLoc DL(N);
3460
3461 // Bail out if BFM simplifies away one node in BFM Dst.
3462 if (OrOpd1 != Dst)
3463 return false;
3464
3465 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3466 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3467 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3468 if (BiggerPattern) {
3469 uint64_t SrcAndImm;
3470 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3471 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3472 // OrOpd0 = AND Src, #Mask
3473 // So BFM simplifies away one AND node from Src and doesn't simplify away
3474 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3475 // one node (from Rd), ORR is better since it has higher throughput and
3476 // smaller latency than BFM on many AArch64 processors (and for the rest
3477 // ORR is at least as good as BFM).
3478 SDValue ShiftedOperand;
3479 uint64_t EncodedShiftImm;
3480 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3481 EncodedShiftImm)) {
3482 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3483 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3484 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3485 return true;
3486 }
3487 }
3488 return false;
3489 }
3490
3491 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3492
3493 uint64_t ShlImm;
3494 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3495 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3496 SDValue Ops[] = {
3497 Dst, Src,
3498 CurDAG->getTargetConstant(
3500 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3501 return true;
3502 }
3503
3504 // Select the following pattern to left-shifted operand rather than BFI.
3505 // %val1 = op ..
3506 // %val2 = shl %val1, #imm
3507 // %res = or %val1, %val2
3508 //
3509 // If N is selected to be BFI, we know that
3510 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3511 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3512 //
3513 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3514 if (OrOpd0.getOperand(0) == OrOpd1) {
3515 SDValue Ops[] = {
3516 OrOpd1, OrOpd1,
3517 CurDAG->getTargetConstant(
3519 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3520 return true;
3521 }
3522 }
3523
3524 uint64_t SrlImm;
3525 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3526 // Select the following pattern to right-shifted operand rather than BFXIL.
3527 // %val1 = op ..
3528 // %val2 = lshr %val1, #imm
3529 // %res = or %val1, %val2
3530 //
3531 // If N is selected to be BFXIL, we know that
3532 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3533 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3534 //
3535 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3536 if (OrOpd0.getOperand(0) == OrOpd1) {
3537 SDValue Ops[] = {
3538 OrOpd1, OrOpd1,
3539 CurDAG->getTargetConstant(
3541 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3542 return true;
3543 }
3544 }
3545
3546 return false;
3547}
3548
3549static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3550 SelectionDAG *CurDAG) {
3551 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3552
3553 EVT VT = N->getValueType(0);
3554 if (VT != MVT::i32 && VT != MVT::i64)
3555 return false;
3556
3557 unsigned BitWidth = VT.getSizeInBits();
3558
3559 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3560 // have the expected shape. Try to undo that.
3561
3562 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3563 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3564
3565 // Given a OR operation, check if we have the following pattern
3566 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3567 // isBitfieldExtractOp)
3568 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3569 // countTrailingZeros(mask2) == imm2 - imm + 1
3570 // f = d | c
3571 // if yes, replace the OR instruction with:
3572 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3573
3574 // OR is commutative, check all combinations of operand order and values of
3575 // BiggerPattern, i.e.
3576 // Opd0, Opd1, BiggerPattern=false
3577 // Opd1, Opd0, BiggerPattern=false
3578 // Opd0, Opd1, BiggerPattern=true
3579 // Opd1, Opd0, BiggerPattern=true
3580 // Several of these combinations may match, so check with BiggerPattern=false
3581 // first since that will produce better results by matching more instructions
3582 // and/or inserting fewer extra instructions.
3583 for (int I = 0; I < 4; ++I) {
3584
3585 SDValue Dst, Src;
3586 unsigned ImmR, ImmS;
3587 bool BiggerPattern = I / 2;
3588 SDValue OrOpd0Val = N->getOperand(I % 2);
3589 SDNode *OrOpd0 = OrOpd0Val.getNode();
3590 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3591 SDNode *OrOpd1 = OrOpd1Val.getNode();
3592
3593 unsigned BFXOpc;
3594 int DstLSB, Width;
3595 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3596 NumberOfIgnoredLowBits, BiggerPattern)) {
3597 // Check that the returned opcode is compatible with the pattern,
3598 // i.e., same type and zero extended (U and not S)
3599 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3600 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3601 continue;
3602
3603 // Compute the width of the bitfield insertion
3604 DstLSB = 0;
3605 Width = ImmS - ImmR + 1;
3606 // FIXME: This constraint is to catch bitfield insertion we may
3607 // want to widen the pattern if we want to grab general bitfied
3608 // move case
3609 if (Width <= 0)
3610 continue;
3611
3612 // If the mask on the insertee is correct, we have a BFXIL operation. We
3613 // can share the ImmR and ImmS values from the already-computed UBFM.
3614 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3615 BiggerPattern,
3616 Src, DstLSB, Width)) {
3617 ImmR = (BitWidth - DstLSB) % BitWidth;
3618 ImmS = Width - 1;
3619 } else
3620 continue;
3621
3622 // Check the second part of the pattern
3623 EVT VT = OrOpd1Val.getValueType();
3624 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3625
3626 // Compute the Known Zero for the candidate of the first operand.
3627 // This allows to catch more general case than just looking for
3628 // AND with imm. Indeed, simplify-demanded-bits may have removed
3629 // the AND instruction because it proves it was useless.
3630 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3631
3632 // Check if there is enough room for the second operand to appear
3633 // in the first one
3634 APInt BitsToBeInserted =
3635 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3636
3637 if ((BitsToBeInserted & ~Known.Zero) != 0)
3638 continue;
3639
3640 // Set the first operand
3641 uint64_t Imm;
3642 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3643 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3644 // In that case, we can eliminate the AND
3645 Dst = OrOpd1->getOperand(0);
3646 else
3647 // Maybe the AND has been removed by simplify-demanded-bits
3648 // or is useful because it discards more bits
3649 Dst = OrOpd1Val;
3650
3651 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3652 // with shifted operand is more efficient.
3653 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3654 BiggerPattern))
3655 return true;
3656
3657 // both parts match
3658 SDLoc DL(N);
3659 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3660 CurDAG->getTargetConstant(ImmS, DL, VT)};
3661 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3662 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3663 return true;
3664 }
3665
3666 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3667 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3668 // mask (e.g., 0x000ffff0).
3669 uint64_t Mask0Imm, Mask1Imm;
3670 SDValue And0 = N->getOperand(0);
3671 SDValue And1 = N->getOperand(1);
3672 if (And0.hasOneUse() && And1.hasOneUse() &&
3673 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3674 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3675 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3676 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3677
3678 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3679 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3680 // bits to be inserted.
3681 if (isShiftedMask(Mask0Imm, VT)) {
3682 std::swap(And0, And1);
3683 std::swap(Mask0Imm, Mask1Imm);
3684 }
3685
3686 SDValue Src = And1->getOperand(0);
3687 SDValue Dst = And0->getOperand(0);
3688 unsigned LSB = llvm::countr_zero(Mask1Imm);
3689 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3690
3691 // The BFXIL inserts the low-order bits from a source register, so right
3692 // shift the needed bits into place.
3693 SDLoc DL(N);
3694 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3695 uint64_t LsrImm = LSB;
3696 if (Src->hasOneUse() &&
3697 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3698 (LsrImm + LSB) < BitWidth) {
3699 Src = Src->getOperand(0);
3700 LsrImm += LSB;
3701 }
3702
3703 SDNode *LSR = CurDAG->getMachineNode(
3704 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3705 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3706
3707 // BFXIL is an alias of BFM, so translate to BFM operands.
3708 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3709 unsigned ImmS = Width - 1;
3710
3711 // Create the BFXIL instruction.
3712 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3713 CurDAG->getTargetConstant(ImmR, DL, VT),
3714 CurDAG->getTargetConstant(ImmS, DL, VT)};
3715 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3716 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3717 return true;
3718 }
3719
3720 return false;
3721}
3722
3723bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3724 if (N->getOpcode() != ISD::OR)
3725 return false;
3726
3727 APInt NUsefulBits;
3728 getUsefulBits(SDValue(N, 0), NUsefulBits);
3729
3730 // If all bits are not useful, just return UNDEF.
3731 if (!NUsefulBits) {
3732 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3733 return true;
3734 }
3735
3736 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3737 return true;
3738
3739 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3740}
3741
3742/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3743/// equivalent of a left shift by a constant amount followed by an and masking
3744/// out a contiguous set of bits.
3745bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3746 if (N->getOpcode() != ISD::AND)
3747 return false;
3748
3749 EVT VT = N->getValueType(0);
3750 if (VT != MVT::i32 && VT != MVT::i64)
3751 return false;
3752
3753 SDValue Op0;
3754 int DstLSB, Width;
3755 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3756 Op0, DstLSB, Width))
3757 return false;
3758
3759 // ImmR is the rotate right amount.
3760 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3761 // ImmS is the most significant bit of the source to be moved.
3762 unsigned ImmS = Width - 1;
3763
3764 SDLoc DL(N);
3765 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3766 CurDAG->getTargetConstant(ImmS, DL, VT)};
3767 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3768 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3769 return true;
3770}
3771
3772/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3773/// variable shift/rotate instructions.
3774bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3775 EVT VT = N->getValueType(0);
3776
3777 unsigned Opc;
3778 switch (N->getOpcode()) {
3779 case ISD::ROTR:
3780 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3781 break;
3782 case ISD::SHL:
3783 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3784 break;
3785 case ISD::SRL:
3786 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3787 break;
3788 case ISD::SRA:
3789 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3790 break;
3791 default:
3792 return false;
3793 }
3794
3795 uint64_t Size;
3796 uint64_t Bits;
3797 if (VT == MVT::i32) {
3798 Bits = 5;
3799 Size = 32;
3800 } else if (VT == MVT::i64) {
3801 Bits = 6;
3802 Size = 64;
3803 } else
3804 return false;
3805
3806 SDValue ShiftAmt = N->getOperand(1);
3807 SDLoc DL(N);
3808 SDValue NewShiftAmt;
3809
3810 // Skip over an extend of the shift amount.
3811 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3812 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3813 ShiftAmt = ShiftAmt->getOperand(0);
3814
3815 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3816 SDValue Add0 = ShiftAmt->getOperand(0);
3817 SDValue Add1 = ShiftAmt->getOperand(1);
3818 uint64_t Add0Imm;
3819 uint64_t Add1Imm;
3820 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3821 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3822 // to avoid the ADD/SUB.
3823 NewShiftAmt = Add0;
3824 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3825 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3826 (Add0Imm % Size == 0)) {
3827 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3828 // to generate a NEG instead of a SUB from a constant.
3829 unsigned NegOpc;
3830 unsigned ZeroReg;
3831 EVT SubVT = ShiftAmt->getValueType(0);
3832 if (SubVT == MVT::i32) {
3833 NegOpc = AArch64::SUBWrr;
3834 ZeroReg = AArch64::WZR;
3835 } else {
3836 assert(SubVT == MVT::i64);
3837 NegOpc = AArch64::SUBXrr;
3838 ZeroReg = AArch64::XZR;
3839 }
3840 SDValue Zero =
3841 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3842 MachineSDNode *Neg =
3843 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3844 NewShiftAmt = SDValue(Neg, 0);
3845 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3846 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3847 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3848 // to generate a NOT instead of a SUB from a constant.
3849 unsigned NotOpc;
3850 unsigned ZeroReg;
3851 EVT SubVT = ShiftAmt->getValueType(0);
3852 if (SubVT == MVT::i32) {
3853 NotOpc = AArch64::ORNWrr;
3854 ZeroReg = AArch64::WZR;
3855 } else {
3856 assert(SubVT == MVT::i64);
3857 NotOpc = AArch64::ORNXrr;
3858 ZeroReg = AArch64::XZR;
3859 }
3860 SDValue Zero =
3861 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3863 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3864 NewShiftAmt = SDValue(Not, 0);
3865 } else
3866 return false;
3867 } else {
3868 // If the shift amount is masked with an AND, check that the mask covers the
3869 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3870 // the AND.
3871 uint64_t MaskImm;
3872 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3873 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3874 return false;
3875
3876 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3877 return false;
3878
3879 NewShiftAmt = ShiftAmt->getOperand(0);
3880 }
3881
3882 // Narrow/widen the shift amount to match the size of the shift operation.
3883 if (VT == MVT::i32)
3884 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3885 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3886 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3887 MachineSDNode *Ext = CurDAG->getMachineNode(
3888 AArch64::SUBREG_TO_REG, DL, VT,
3889 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3890 NewShiftAmt = SDValue(Ext, 0);
3891 }
3892
3893 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3894 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3895 return true;
3896}
3897
3899 SDValue &FixedPos,
3900 unsigned RegWidth,
3901 bool isReciprocal) {
3902 APFloat FVal(0.0);
3903 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
3904 FVal = CN->getValueAPF();
3905 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3906 // Some otherwise illegal constants are allowed in this case.
3907 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3908 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3909 return false;
3910
3911 ConstantPoolSDNode *CN =
3912 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3913 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3914 } else
3915 return false;
3916
3917 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3918 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3919 // x-register.
3920 //
3921 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3922 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3923 // integers.
3924 bool IsExact;
3925
3926 if (isReciprocal)
3927 if (!FVal.getExactInverse(&FVal))
3928 return false;
3929
3930 // fbits is between 1 and 64 in the worst-case, which means the fmul
3931 // could have 2^64 as an actual operand. Need 65 bits of precision.
3932 APSInt IntVal(65, true);
3933 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
3934
3935 // N.b. isPowerOf2 also checks for > 0.
3936 if (!IsExact || !IntVal.isPowerOf2())
3937 return false;
3938 unsigned FBits = IntVal.logBase2();
3939
3940 // Checks above should have guaranteed that we haven't lost information in
3941 // finding FBits, but it must still be in range.
3942 if (FBits == 0 || FBits > RegWidth) return false;
3943
3944 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
3945 return true;
3946}
3947
3948bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3949 unsigned RegWidth) {
3950 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3951 false);
3952}
3953
3954bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
3955 SDValue &FixedPos,
3956 unsigned RegWidth) {
3957 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3958 true);
3959}
3960
3961// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3962// of the string and obtains the integer values from them and combines these
3963// into a single value to be used in the MRS/MSR instruction.
3966 RegString.split(Fields, ':');
3967
3968 if (Fields.size() == 1)
3969 return -1;
3970
3971 assert(Fields.size() == 5
3972 && "Invalid number of fields in read register string");
3973
3975 bool AllIntFields = true;
3976
3977 for (StringRef Field : Fields) {
3978 unsigned IntField;
3979 AllIntFields &= !Field.getAsInteger(10, IntField);
3980 Ops.push_back(IntField);
3981 }
3982
3983 assert(AllIntFields &&
3984 "Unexpected non-integer value in special register string.");
3985 (void)AllIntFields;
3986
3987 // Need to combine the integer fields of the string into a single value
3988 // based on the bit encoding of MRS/MSR instruction.
3989 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
3990 (Ops[3] << 3) | (Ops[4]);
3991}
3992
3993// Lower the read_register intrinsic to an MRS instruction node if the special
3994// register string argument is either of the form detailed in the ALCE (the
3995// form described in getIntOperandsFromRegsterString) or is a named register
3996// known by the MRS SysReg mapper.
3997bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
3998 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3999 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4000 SDLoc DL(N);
4001
4002 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4003
4004 unsigned Opcode64Bit = AArch64::MRS;
4005 int Imm = getIntOperandFromRegisterString(RegString->getString());
4006 if (Imm == -1) {
4007 // No match, Use the sysreg mapper to map the remaining possible strings to
4008 // the value for the register to be used for the instruction operand.
4009 const auto *TheReg =
4010 AArch64SysReg::lookupSysRegByName(RegString->getString());
4011 if (TheReg && TheReg->Readable &&
4012 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4013 Imm = TheReg->Encoding;
4014 else
4015 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4016
4017 if (Imm == -1) {
4018 // Still no match, see if this is "pc" or give up.
4019 if (!ReadIs128Bit && RegString->getString() == "pc") {
4020 Opcode64Bit = AArch64::ADR;
4021 Imm = 0;
4022 } else {
4023 return false;
4024 }
4025 }
4026 }
4027
4028 SDValue InChain = N->getOperand(0);
4029 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4030 if (!ReadIs128Bit) {
4031 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4032 {SysRegImm, InChain});
4033 } else {
4034 SDNode *MRRS = CurDAG->getMachineNode(
4035 AArch64::MRRS, DL,
4036 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4037 {SysRegImm, InChain});
4038
4039 // Sysregs are not endian. The even register always contains the low half
4040 // of the register.
4041 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4042 SDValue(MRRS, 0));
4043 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4044 SDValue(MRRS, 0));
4045 SDValue OutChain = SDValue(MRRS, 1);
4046
4047 ReplaceUses(SDValue(N, 0), Lo);
4048 ReplaceUses(SDValue(N, 1), Hi);
4049 ReplaceUses(SDValue(N, 2), OutChain);
4050 };
4051 return true;
4052}
4053
4054// Lower the write_register intrinsic to an MSR instruction node if the special
4055// register string argument is either of the form detailed in the ALCE (the
4056// form described in getIntOperandsFromRegsterString) or is a named register
4057// known by the MSR SysReg mapper.
4058bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4059 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4060 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4061 SDLoc DL(N);
4062
4063 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4064
4065 if (!WriteIs128Bit) {
4066 // Check if the register was one of those allowed as the pstatefield value
4067 // in the MSR (immediate) instruction. To accept the values allowed in the
4068 // pstatefield for the MSR (immediate) instruction, we also require that an
4069 // immediate value has been provided as an argument, we know that this is
4070 // the case as it has been ensured by semantic checking.
4071 auto trySelectPState = [&](auto PMapper, unsigned State) {
4072 if (PMapper) {
4073 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4074 "Expected a constant integer expression.");
4075 unsigned Reg = PMapper->Encoding;
4076 uint64_t Immed = N->getConstantOperandVal(2);
4077 CurDAG->SelectNodeTo(
4078 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4079 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4080 return true;
4081 }
4082 return false;
4083 };
4084
4085 if (trySelectPState(
4086 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4087 AArch64::MSRpstateImm4))
4088 return true;
4089 if (trySelectPState(
4090 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4091 AArch64::MSRpstateImm1))
4092 return true;
4093 }
4094
4095 int Imm = getIntOperandFromRegisterString(RegString->getString());
4096 if (Imm == -1) {
4097 // Use the sysreg mapper to attempt to map the remaining possible strings
4098 // to the value for the register to be used for the MSR (register)
4099 // instruction operand.
4100 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4101 if (TheReg && TheReg->Writeable &&
4102 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4103 Imm = TheReg->Encoding;
4104 else
4105 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4106
4107 if (Imm == -1)
4108 return false;
4109 }
4110
4111 SDValue InChain = N->getOperand(0);
4112 if (!WriteIs128Bit) {
4113 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4114 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4115 N->getOperand(2), InChain);
4116 } else {
4117 // No endian swap. The lower half always goes into the even subreg, and the
4118 // higher half always into the odd supreg.
4119 SDNode *Pair = CurDAG->getMachineNode(
4120 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4121 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4122 MVT::i32),
4123 N->getOperand(2),
4124 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4125 N->getOperand(3),
4126 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4127
4128 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4129 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4130 SDValue(Pair, 0), InChain);
4131 }
4132
4133 return true;
4134}
4135
4136/// We've got special pseudo-instructions for these
4137bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4138 unsigned Opcode;
4139 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4140
4141 // Leave IR for LSE if subtarget supports it.
4142 if (Subtarget->hasLSE()) return false;
4143
4144 if (MemTy == MVT::i8)
4145 Opcode = AArch64::CMP_SWAP_8;
4146 else if (MemTy == MVT::i16)
4147 Opcode = AArch64::CMP_SWAP_16;
4148 else if (MemTy == MVT::i32)
4149 Opcode = AArch64::CMP_SWAP_32;
4150 else if (MemTy == MVT::i64)
4151 Opcode = AArch64::CMP_SWAP_64;
4152 else
4153 llvm_unreachable("Unknown AtomicCmpSwap type");
4154
4155 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4156 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4157 N->getOperand(0)};
4158 SDNode *CmpSwap = CurDAG->getMachineNode(
4159 Opcode, SDLoc(N),
4160 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4161
4162 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4163 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4164
4165 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4166 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4167 CurDAG->RemoveDeadNode(N);
4168
4169 return true;
4170}
4171
4172bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4173 SDValue &Shift) {
4174 if (!isa<ConstantSDNode>(N))
4175 return false;
4176
4177 SDLoc DL(N);
4178 uint64_t Val = cast<ConstantSDNode>(N)
4179 ->getAPIntValue()
4180 .trunc(VT.getFixedSizeInBits())
4181 .getZExtValue();
4182
4183 switch (VT.SimpleTy) {
4184 case MVT::i8:
4185 // All immediates are supported.
4186 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4187 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4188 return true;
4189 case MVT::i16:
4190 case MVT::i32:
4191 case MVT::i64:
4192 // Support 8bit unsigned immediates.
4193 if (Val <= 255) {
4194 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4195 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4196 return true;
4197 }
4198 // Support 16bit unsigned immediates that are a multiple of 256.
4199 if (Val <= 65280 && Val % 256 == 0) {
4200 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4201 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4202 return true;
4203 }
4204 break;
4205 default:
4206 break;
4207 }
4208
4209 return false;
4210}
4211
4212bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4213 SDValue &Imm, SDValue &Shift,
4214 bool Negate) {
4215 if (!isa<ConstantSDNode>(N))
4216 return false;
4217
4218 SDLoc DL(N);
4219 int64_t Val = cast<ConstantSDNode>(N)
4220 ->getAPIntValue()
4221 .trunc(VT.getFixedSizeInBits())
4222 .getSExtValue();
4223
4224 if (Negate)
4225 Val = -Val;
4226
4227 // Signed saturating instructions treat their immediate operand as unsigned,
4228 // whereas the related intrinsics define their operands to be signed. This
4229 // means we can only use the immediate form when the operand is non-negative.
4230 if (Val < 0)
4231 return false;
4232
4233 switch (VT.SimpleTy) {
4234 case MVT::i8:
4235 // All positive immediates are supported.
4236 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4237 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4238 return true;
4239 case MVT::i16:
4240 case MVT::i32:
4241 case MVT::i64:
4242 // Support 8bit positive immediates.
4243 if (Val <= 255) {
4244 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4245 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4246 return true;
4247 }
4248 // Support 16bit positive immediates that are a multiple of 256.
4249 if (Val <= 65280 && Val % 256 == 0) {
4250 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4251 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4252 return true;
4253 }
4254 break;
4255 default:
4256 break;
4257 }
4258
4259 return false;
4260}
4261
4262bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4263 SDValue &Shift) {
4264 if (!isa<ConstantSDNode>(N))
4265 return false;
4266
4267 SDLoc DL(N);
4268 int64_t Val = cast<ConstantSDNode>(N)
4269 ->getAPIntValue()
4270 .trunc(VT.getFixedSizeInBits())
4271 .getSExtValue();
4272
4273 switch (VT.SimpleTy) {
4274 case MVT::i8:
4275 // All immediates are supported.
4276 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4277 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4278 return true;
4279 case MVT::i16:
4280 case MVT::i32:
4281 case MVT::i64:
4282 // Support 8bit signed immediates.
4283 if (Val >= -128 && Val <= 127) {
4284 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4285 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4286 return true;
4287 }
4288 // Support 16bit signed immediates that are a multiple of 256.
4289 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
4290 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4291 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
4292 return true;
4293 }
4294 break;
4295 default:
4296 break;
4297 }
4298
4299 return false;
4300}
4301
4302bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4303 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4304 int64_t ImmVal = CNode->getSExtValue();
4305 SDLoc DL(N);
4306 if (ImmVal >= -128 && ImmVal < 128) {
4307 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4308 return true;
4309 }
4310 }
4311 return false;
4312}
4313
4314bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4315 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4316 uint64_t ImmVal = CNode->getZExtValue();
4317
4318 switch (VT.SimpleTy) {
4319 case MVT::i8:
4320 ImmVal &= 0xFF;
4321 break;
4322 case MVT::i16:
4323 ImmVal &= 0xFFFF;
4324 break;
4325 case MVT::i32:
4326 ImmVal &= 0xFFFFFFFF;
4327 break;
4328 case MVT::i64:
4329 break;
4330 default:
4331 llvm_unreachable("Unexpected type");
4332 }
4333
4334 if (ImmVal < 256) {
4335 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4336 return true;
4337 }
4338 }
4339 return false;
4340}
4341
4342bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4343 bool Invert) {
4344 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4345 uint64_t ImmVal = CNode->getZExtValue();
4346 SDLoc DL(N);
4347
4348 if (Invert)
4349 ImmVal = ~ImmVal;
4350
4351 // Shift mask depending on type size.
4352 switch (VT.SimpleTy) {
4353 case MVT::i8:
4354 ImmVal &= 0xFF;
4355 ImmVal |= ImmVal << 8;
4356 ImmVal |= ImmVal << 16;
4357 ImmVal |= ImmVal << 32;
4358 break;
4359 case MVT::i16:
4360 ImmVal &= 0xFFFF;
4361 ImmVal |= ImmVal << 16;
4362 ImmVal |= ImmVal << 32;
4363 break;
4364 case MVT::i32:
4365 ImmVal &= 0xFFFFFFFF;
4366 ImmVal |= ImmVal << 32;
4367 break;
4368 case MVT::i64:
4369 break;
4370 default:
4371 llvm_unreachable("Unexpected type");
4372 }
4373
4374 uint64_t encoding;
4375 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4376 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4377 return true;
4378 }
4379 }
4380 return false;
4381}
4382
4383// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4384// Rather than attempt to normalise everything we can sometimes saturate the
4385// shift amount during selection. This function also allows for consistent
4386// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4387// required by the instructions.
4388bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4389 uint64_t High, bool AllowSaturation,
4390 SDValue &Imm) {
4391 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4392 uint64_t ImmVal = CN->getZExtValue();
4393
4394 // Reject shift amounts that are too small.
4395 if (ImmVal < Low)
4396 return false;
4397
4398 // Reject or saturate shift amounts that are too big.
4399 if (ImmVal > High) {
4400 if (!AllowSaturation)
4401 return false;
4402 ImmVal = High;
4403 }
4404
4405 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4406 return true;
4407 }
4408
4409 return false;
4410}
4411
4412bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4413 // tagp(FrameIndex, IRGstack, tag_offset):
4414 // since the offset between FrameIndex and IRGstack is a compile-time
4415 // constant, this can be lowered to a single ADDG instruction.
4416 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4417 return false;
4418 }
4419
4420 SDValue IRG_SP = N->getOperand(2);
4421 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4422 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4423 return false;
4424 }
4425
4426 const TargetLowering *TLI = getTargetLowering();
4427 SDLoc DL(N);
4428 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4429 SDValue FiOp = CurDAG->getTargetFrameIndex(
4430 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4431 int TagOffset = N->getConstantOperandVal(3);
4432
4433 SDNode *Out = CurDAG->getMachineNode(
4434 AArch64::TAGPstack, DL, MVT::i64,
4435 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4436 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4437 ReplaceNode(N, Out);
4438 return true;
4439}
4440
4441void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4442 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4443 "llvm.aarch64.tagp third argument must be an immediate");
4444 if (trySelectStackSlotTagP(N))
4445 return;
4446 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4447 // compile-time constant, not just for stack allocations.
4448
4449 // General case for unrelated pointers in Op1 and Op2.
4450 SDLoc DL(N);
4451 int TagOffset = N->getConstantOperandVal(3);
4452 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4453 {N->getOperand(1), N->getOperand(2)});
4454 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4455 {SDValue(N1, 0), N->getOperand(2)});
4456 SDNode *N3 = CurDAG->getMachineNode(
4457 AArch64::ADDG, DL, MVT::i64,
4458 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4459 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4460 ReplaceNode(N, N3);
4461}
4462
4463bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4464 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4465
4466 // Bail when not a "cast" like insert_subvector.
4467 if (N->getConstantOperandVal(2) != 0)
4468 return false;
4469 if (!N->getOperand(0).isUndef())
4470 return false;
4471
4472 // Bail when normal isel should do the job.
4473 EVT VT = N->getValueType(0);
4474 EVT InVT = N->getOperand(1).getValueType();
4475 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4476 return false;
4477 if (InVT.getSizeInBits() <= 128)
4478 return false;
4479
4480 // NOTE: We can only get here when doing fixed length SVE code generation.
4481 // We do manual selection because the types involved are not linked to real
4482 // registers (despite being legal) and must be coerced into SVE registers.
4483
4485 "Expected to insert into a packed scalable vector!");
4486
4487 SDLoc DL(N);
4488 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4489 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4490 N->getOperand(1), RC));
4491 return true;
4492}
4493
4494bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4495 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4496
4497 // Bail when not a "cast" like extract_subvector.
4498 if (N->getConstantOperandVal(1) != 0)
4499 return false;
4500
4501 // Bail when normal isel can do the job.
4502 EVT VT = N->getValueType(0);
4503 EVT InVT = N->getOperand(0).getValueType();
4504 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4505 return false;
4506 if (VT.getSizeInBits() <= 128)
4507 return false;
4508
4509 // NOTE: We can only get here when doing fixed length SVE code generation.
4510 // We do manual selection because the types involved are not linked to real
4511 // registers (despite being legal) and must be coerced into SVE registers.
4512
4514 "Expected to extract from a packed scalable vector!");
4515
4516 SDLoc DL(N);
4517 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4518 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4519 N->getOperand(0), RC));
4520 return true;
4521}
4522
4523bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4524 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4525
4526 SDValue N0 = N->getOperand(0);
4527 SDValue N1 = N->getOperand(1);
4528 EVT VT = N->getValueType(0);
4529
4530 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4531 // Rotate by a constant is a funnel shift in IR which is exanded to
4532 // an OR with shifted operands.
4533 // We do the following transform:
4534 // OR N0, N1 -> xar (x, y, imm)
4535 // Where:
4536 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4537 // N0 = SHL_PRED true, V, splat(bits-imm)
4538 // V = (xor x, y)
4539 if (VT.isScalableVector() &&
4540 (Subtarget->hasSVE2() ||
4541 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4542 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4544 std::swap(N0, N1);
4545 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4547 return false;
4548
4549 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4550 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4551 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4552 return false;
4553
4554 SDValue XOR = N0.getOperand(1);
4555 if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(1))
4556 return false;
4557
4558 APInt ShlAmt, ShrAmt;
4559 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4561 return false;
4562
4563 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4564 return false;
4565
4566 SDLoc DL(N);
4567 SDValue Imm =
4568 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4569
4570 SDValue Ops[] = {XOR.getOperand(0), XOR.getOperand(1), Imm};
4571 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4572 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4573 AArch64::XAR_ZZZI_D})) {
4574 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4575 return true;
4576 }
4577 return false;
4578 }
4579
4580 if (!Subtarget->hasSHA3())
4581 return false;
4582
4583 if (N0->getOpcode() != AArch64ISD::VSHL ||
4585 return false;
4586
4587 if (N0->getOperand(0) != N1->getOperand(0) ||
4588 N1->getOperand(0)->getOpcode() != ISD::XOR)
4589 return false;
4590
4591 SDValue XOR = N0.getOperand(0);
4592 SDValue R1 = XOR.getOperand(0);
4593 SDValue R2 = XOR.getOperand(1);
4594
4595 unsigned HsAmt = N0.getConstantOperandVal(1);
4596 unsigned ShAmt = N1.getConstantOperandVal(1);
4597
4598 SDLoc DL = SDLoc(N0.getOperand(1));
4599 SDValue Imm = CurDAG->getTargetConstant(
4600 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4601
4602 if (ShAmt + HsAmt != 64)
4603 return false;
4604
4605 SDValue Ops[] = {R1, R2, Imm};
4606 CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
4607
4608 return true;
4609}
4610
4611void AArch64DAGToDAGISel::Select(SDNode *Node) {
4612 // If we have a custom node, we already have selected!
4613 if (Node->isMachineOpcode()) {
4614 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4615 Node->setNodeId(-1);
4616 return;
4617 }
4618
4619 // Few custom selection stuff.
4620 EVT VT = Node->getValueType(0);
4621
4622 switch (Node->getOpcode()) {
4623 default:
4624 break;
4625
4627 if (SelectCMP_SWAP(Node))
4628 return;
4629 break;
4630
4631 case ISD::READ_REGISTER:
4632 case AArch64ISD::MRRS:
4633 if (tryReadRegister(Node))
4634 return;
4635 break;
4636
4638 case AArch64ISD::MSRR:
4639 if (tryWriteRegister(Node))
4640 return;
4641 break;
4642
4643 case ISD::LOAD: {
4644 // Try to select as an indexed load. Fall through to normal processing
4645 // if we can't.
4646 if (tryIndexedLoad(Node))
4647 return;
4648 break;
4649 }
4650
4651 case ISD::SRL:
4652 case ISD::AND:
4653 case ISD::SRA:
4655 if (tryBitfieldExtractOp(Node))
4656 return;
4657 if (tryBitfieldInsertInZeroOp(Node))
4658 return;
4659 [[fallthrough]];
4660 case ISD::ROTR:
4661 case ISD::SHL:
4662 if (tryShiftAmountMod(Node))
4663 return;
4664 break;
4665
4666 case ISD::SIGN_EXTEND:
4667 if (tryBitfieldExtractOpFromSExt(Node))
4668 return;
4669 break;
4670
4671 case ISD::OR:
4672 if (tryBitfieldInsertOp(Node))
4673 return;
4674 if (trySelectXAR(Node))
4675 return;
4676 break;
4677
4679 if (trySelectCastScalableToFixedLengthVector(Node))
4680 return;
4681 break;
4682 }
4683
4684 case ISD::INSERT_SUBVECTOR: {
4685 if (trySelectCastFixedLengthToScalableVector(Node))
4686 return;
4687 break;
4688 }
4689
4690 case ISD::Constant: {
4691 // Materialize zero constants as copies from WZR/XZR. This allows
4692 // the coalescer to propagate these into other instructions.
4693 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4694 if (ConstNode->isZero()) {
4695 if (VT == MVT::i32) {
4696 SDValue New = CurDAG->getCopyFromReg(
4697 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4698 ReplaceNode(Node, New.getNode());
4699 return;
4700 } else if (VT == MVT::i64) {
4701 SDValue New = CurDAG->getCopyFromReg(
4702 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4703 ReplaceNode(Node, New.getNode());
4704 return;
4705 }
4706 }
4707 break;
4708 }
4709
4710 case ISD::FrameIndex: {
4711 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4712 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4713 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4714 const TargetLowering *TLI = getTargetLowering();
4715 SDValue TFI = CurDAG->getTargetFrameIndex(
4716 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4717 SDLoc DL(Node);
4718 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4719 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4720 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4721 return;
4722 }
4724 unsigned IntNo = Node->getConstantOperandVal(1);
4725 switch (IntNo) {
4726 default:
4727 break;
4728 case Intrinsic::aarch64_gcsss: {
4729 SDLoc DL(Node);
4730 SDValue Chain = Node->getOperand(0);
4731 SDValue Val = Node->getOperand(2);
4732 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4733 SDNode *SS1 =
4734 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4735 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4736 MVT::Other, Zero, SDValue(SS1, 0));
4737 ReplaceNode(Node, SS2);
4738 return;
4739 }
4740 case Intrinsic::aarch64_ldaxp:
4741 case Intrinsic::aarch64_ldxp: {
4742 unsigned Op =
4743 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4744 SDValue MemAddr = Node->getOperand(2);
4745 SDLoc DL(Node);
4746 SDValue Chain = Node->getOperand(0);
4747
4748 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4749 MVT::Other, MemAddr, Chain);
4750
4751 // Transfer memoperands.
4753 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4754 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4755 ReplaceNode(Node, Ld);
4756 return;
4757 }
4758 case Intrinsic::aarch64_stlxp:
4759 case Intrinsic::aarch64_stxp: {
4760 unsigned Op =
4761 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4762 SDLoc DL(Node);
4763 SDValue Chain = Node->getOperand(0);
4764 SDValue ValLo = Node->getOperand(2);
4765 SDValue ValHi = Node->getOperand(3);
4766 SDValue MemAddr = Node->getOperand(4);
4767
4768 // Place arguments in the right order.
4769 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4770
4771 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4772 // Transfer memoperands.
4774 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4775 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4776
4777 ReplaceNode(Node, St);
4778 return;
4779 }
4780 case Intrinsic::aarch64_neon_ld1x2:
4781 if (VT == MVT::v8i8) {
4782 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4783 return;
4784 } else if (VT == MVT::v16i8) {
4785 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4786 return;
4787 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4788 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4789 return;
4790 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4791 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4792 return;
4793 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4794 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4795 return;
4796 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4797 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4798 return;
4799 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4800 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4801 return;
4802 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4803 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4804 return;
4805 }
4806 break;
4807 case Intrinsic::aarch64_neon_ld1x3:
4808 if (VT == MVT::v8i8) {
4809 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4810 return;
4811 } else if (VT == MVT::v16i8) {
4812 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4813 return;
4814 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4815 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4816 return;
4817 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4818 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4819 return;
4820 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4821 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4822 return;
4823 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4824 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4825 return;
4826 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4827 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4828 return;
4829 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4830 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
4831 return;
4832 }
4833 break;
4834 case Intrinsic::aarch64_neon_ld1x4:
4835 if (VT == MVT::v8i8) {
4836 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
4837 return;
4838 } else if (VT == MVT::v16i8) {
4839 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
4840 return;
4841 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4842 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
4843 return;
4844 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4845 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
4846 return;
4847 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4848 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
4849 return;
4850 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4851 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
4852 return;
4853 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4854 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4855 return;
4856 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4857 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
4858 return;
4859 }
4860 break;
4861 case Intrinsic::aarch64_neon_ld2:
4862 if (VT == MVT::v8i8) {
4863 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
4864 return;
4865 } else if (VT == MVT::v16i8) {
4866 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
4867 return;
4868 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4869 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
4870 return;
4871 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4872 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
4873 return;
4874 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4875 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
4876 return;
4877 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4878 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
4879 return;
4880 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4881 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4882 return;
4883 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4884 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
4885 return;
4886 }
4887 break;
4888 case Intrinsic::aarch64_neon_ld3:
4889 if (VT == MVT::v8i8) {
4890 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
4891 return;
4892 } else if (VT == MVT::v16i8) {
4893 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
4894 return;
4895 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4896 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
4897 return;
4898 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4899 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
4900 return;
4901 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4902 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
4903 return;
4904 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4905 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
4906 return;
4907 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4908 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4909 return;
4910 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4911 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
4912 return;
4913 }
4914 break;
4915 case Intrinsic::aarch64_neon_ld4:
4916 if (VT == MVT::v8i8) {
4917 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
4918 return;
4919 } else if (VT == MVT::v16i8) {
4920 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
4921 return;
4922 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4923 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
4924 return;
4925 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4926 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
4927 return;
4928 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4929 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
4930 return;
4931 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4932 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
4933 return;
4934 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4935 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4936 return;
4937 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4938 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
4939 return;
4940 }
4941 break;
4942 case Intrinsic::aarch64_neon_ld2r:
4943 if (VT == MVT::v8i8) {
4944 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
4945 return;
4946 } else if (VT == MVT::v16i8) {
4947 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
4948 return;
4949 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4950 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
4951 return;
4952 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4953 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
4954 return;
4955 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4956 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
4957 return;
4958 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4959 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
4960 return;
4961 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4962 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
4963 return;
4964 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4965 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
4966 return;
4967 }
4968 break;
4969 case Intrinsic::aarch64_neon_ld3r:
4970 if (VT == MVT::v8i8) {
4971 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
4972 return;
4973 } else if (VT == MVT::v16i8) {
4974 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
4975 return;
4976 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4977 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
4978 return;
4979 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4980 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
4981 return;
4982 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4983 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
4984 return;
4985 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4986 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
4987 return;
4988 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4989 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
4990 return;
4991 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4992 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
4993 return;
4994 }
4995 break;
4996 case Intrinsic::aarch64_neon_ld4r:
4997 if (VT == MVT::v8i8) {
4998 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
4999 return;
5000 } else if (VT == MVT::v16i8) {
5001 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5002 return;
5003 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5004 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5005 return;
5006 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5007 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5008 return;
5009 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5010 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5011 return;
5012 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5013 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5014 return;
5015 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5016 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5017 return;
5018 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5019 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5020 return;
5021 }
5022 break;
5023 case Intrinsic::aarch64_neon_ld2lane:
5024 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5025 SelectLoadLane(Node, 2, AArch64::LD2i8);
5026 return;
5027 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5028 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5029 SelectLoadLane(Node, 2, AArch64::LD2i16);
5030 return;
5031 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5032 VT == MVT::v2f32) {
5033 SelectLoadLane(Node, 2, AArch64::LD2i32);
5034 return;
5035 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5036 VT == MVT::v1f64) {
5037 SelectLoadLane(Node, 2, AArch64::LD2i64);
5038 return;
5039 }
5040 break;
5041 case Intrinsic::aarch64_neon_ld3lane:
5042 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5043 SelectLoadLane(Node, 3, AArch64::LD3i8);
5044 return;
5045 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5046 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5047 SelectLoadLane(Node, 3, AArch64::LD3i16);
5048 return;
5049 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5050 VT == MVT::v2f32) {
5051 SelectLoadLane(Node, 3, AArch64::LD3i32);
5052 return;
5053 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5054 VT == MVT::v1f64) {
5055 SelectLoadLane(Node, 3, AArch64::LD3i64);
5056 return;
5057 }
5058 break;
5059 case Intrinsic::aarch64_neon_ld4lane:
5060 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5061 SelectLoadLane(Node, 4, AArch64::LD4i8);
5062 return;
5063 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5064 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5065 SelectLoadLane(Node, 4, AArch64::LD4i16);
5066 return;
5067 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5068 VT == MVT::v2f32) {
5069 SelectLoadLane(Node, 4, AArch64::LD4i32);
5070 return;
5071 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5072 VT == MVT::v1f64) {
5073 SelectLoadLane(Node, 4, AArch64::LD4i64);
5074 return;
5075 }
5076 break;
5077 case Intrinsic::aarch64_ld64b:
5078 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5079 return;
5080 case Intrinsic::aarch64_sve_ld2q_sret: {
5081 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5082 return;
5083 }
5084 case Intrinsic::aarch64_sve_ld3q_sret: {
5085 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5086 return;
5087 }
5088 case Intrinsic::aarch64_sve_ld4q_sret: {
5089 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5090 return;
5091 }
5092 case Intrinsic::aarch64_sve_ld2_sret: {
5093 if (VT == MVT::nxv16i8) {
5094 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5095 true);
5096 return;
5097 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5098 VT == MVT::nxv8bf16) {
5099 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5100 true);
5101 return;
5102 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5103 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5104 true);
5105 return;
5106 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5107 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5108 true);
5109 return;
5110 }
5111 break;
5112 }
5113 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5114 if (VT == MVT::nxv16i8) {
5115 if (Subtarget->hasSME2())
5116 SelectContiguousMultiVectorLoad(
5117 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5118 else if (Subtarget->hasSVE2p1())
5119 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5120 AArch64::LD1B_2Z);
5121 else
5122 break;
5123 return;
5124 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5125 VT == MVT::nxv8bf16) {
5126 if (Subtarget->hasSME2())
5127 SelectContiguousMultiVectorLoad(
5128 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5129 else if (Subtarget->hasSVE2p1())
5130 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5131 AArch64::LD1H_2Z);
5132 else
5133 break;
5134 return;
5135 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5136 if (Subtarget->hasSME2())
5137 SelectContiguousMultiVectorLoad(
5138 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5139 else if (Subtarget->hasSVE2p1())
5140 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5141 AArch64::LD1W_2Z);
5142 else
5143 break;
5144 return;
5145 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5146 if (Subtarget->hasSME2())
5147 SelectContiguousMultiVectorLoad(
5148 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5149 else if (Subtarget->hasSVE2p1())
5150 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5151 AArch64::LD1D_2Z);
5152 else
5153 break;
5154 return;
5155 }
5156 break;
5157 }
5158 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5159 if (VT == MVT::nxv16i8) {
5160 if (Subtarget->hasSME2())
5161 SelectContiguousMultiVectorLoad(
5162 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5163 else if (Subtarget->hasSVE2p1())
5164 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5165 AArch64::LD1B_4Z);
5166 else
5167 break;
5168 return;
5169 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5170 VT == MVT::nxv8bf16) {
5171 if (Subtarget->hasSME2())
5172 SelectContiguousMultiVectorLoad(
5173 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5174 else if (Subtarget->hasSVE2p1())
5175 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5176 AArch64::LD1H_4Z);
5177 else
5178 break;
5179 return;
5180 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5181 if (Subtarget->hasSME2())
5182 SelectContiguousMultiVectorLoad(
5183 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5184 else if (Subtarget->hasSVE2p1())
5185 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5186 AArch64::LD1W_4Z);
5187 else
5188 break;
5189 return;
5190 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5191 if (Subtarget->hasSME2())
5192 SelectContiguousMultiVectorLoad(
5193 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5194 else if (Subtarget->hasSVE2p1())
5195 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5196 AArch64::LD1D_4Z);
5197 else
5198 break;
5199 return;
5200 }
5201 break;
5202 }
5203 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5204 if (VT == MVT::nxv16i8) {
5205 if (Subtarget->hasSME2())
5206 SelectContiguousMultiVectorLoad(Node, 2, 0,
5207 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5208 AArch64::LDNT1B_2Z_PSEUDO);
5209 else if (Subtarget->hasSVE2p1())
5210 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5211 AArch64::LDNT1B_2Z);
5212 else
5213 break;
5214 return;
5215 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5216 VT == MVT::nxv8bf16) {
5217 if (Subtarget->hasSME2())
5218 SelectContiguousMultiVectorLoad(Node, 2, 1,
5219 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5220 AArch64::LDNT1H_2Z_PSEUDO);
5221 else if (Subtarget->hasSVE2p1())
5222 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5223 AArch64::LDNT1H_2Z);
5224 else
5225 break;
5226 return;
5227 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5228 if (Subtarget->hasSME2())
5229 SelectContiguousMultiVectorLoad(Node, 2, 2,
5230 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5231 AArch64::LDNT1W_2Z_PSEUDO);
5232 else if (Subtarget->hasSVE2p1())
5233 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5234 AArch64::LDNT1W_2Z);
5235 else
5236 break;
5237 return;
5238 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5239 if (Subtarget->hasSME2())
5240 SelectContiguousMultiVectorLoad(Node, 2, 3,
5241 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5242 AArch64::LDNT1D_2Z_PSEUDO);
5243 else if (Subtarget->hasSVE2p1())
5244 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5245 AArch64::LDNT1D_2Z);
5246 else
5247 break;
5248 return;
5249 }
5250 break;
5251 }
5252 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5253 if (VT == MVT::nxv16i8) {
5254 if (Subtarget->hasSME2())
5255 SelectContiguousMultiVectorLoad(Node, 4, 0,
5256 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5257 AArch64::LDNT1B_4Z_PSEUDO);
5258 else if (Subtarget->hasSVE2p1())
5259 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5260 AArch64::LDNT1B_4Z);
5261 else
5262 break;
5263 return;
5264 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5265 VT == MVT::nxv8bf16) {
5266 if (Subtarget->hasSME2())
5267 SelectContiguousMultiVectorLoad(Node, 4, 1,
5268 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5269 AArch64::LDNT1H_4Z_PSEUDO);
5270 else if (Subtarget->hasSVE2p1())
5271 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5272 AArch64::LDNT1H_4Z);
5273 else
5274 break;
5275 return;
5276 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5277 if (Subtarget->hasSME2())
5278 SelectContiguousMultiVectorLoad(Node, 4, 2,
5279 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5280 AArch64::LDNT1W_4Z_PSEUDO);
5281 else if (Subtarget->hasSVE2p1())
5282 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5283 AArch64::LDNT1W_4Z);
5284 else
5285 break;
5286 return;
5287 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5288 if (Subtarget->hasSME2())
5289 SelectContiguousMultiVectorLoad(Node, 4, 3,
5290 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5291 AArch64::LDNT1D_4Z_PSEUDO);
5292 else if (Subtarget->hasSVE2p1())
5293 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5294 AArch64::LDNT1D_4Z);
5295 else
5296 break;
5297 return;
5298 }
5299 break;
5300 }
5301 case Intrinsic::aarch64_sve_ld3_sret: {
5302 if (VT == MVT::nxv16i8) {
5303 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5304 true);
5305 return;
5306 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5307 VT == MVT::nxv8bf16) {
5308 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5309 true);
5310 return;
5311 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5312 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5313 true);
5314 return;
5315 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5316 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5317 true);
5318 return;
5319 }
5320 break;
5321 }
5322 case Intrinsic::aarch64_sve_ld4_sret: {
5323 if (VT == MVT::nxv16i8) {
5324 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5325 true);
5326 return;
5327 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5328 VT == MVT::nxv8bf16) {
5329 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5330 true);
5331 return;
5332 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5333 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5334 true);
5335 return;
5336 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5337 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5338 true);
5339 return;
5340 }
5341 break;
5342 }
5343 case Intrinsic::aarch64_sme_read_hor_vg2: {
5344 if (VT == MVT::nxv16i8) {
5345 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5346 AArch64::MOVA_2ZMXI_H_B);
5347 return;
5348 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5349 VT == MVT::nxv8bf16) {
5350 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5351 AArch64::MOVA_2ZMXI_H_H);
5352 return;
5353 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5354 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5355 AArch64::MOVA_2ZMXI_H_S);
5356 return;
5357 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5358 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5359 AArch64::MOVA_2ZMXI_H_D);
5360 return;
5361 }
5362 break;
5363 }
5364 case Intrinsic::aarch64_sme_read_ver_vg2: {
5365 if (VT == MVT::nxv16i8) {
5366 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5367 AArch64::MOVA_2ZMXI_V_B);
5368 return;
5369 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5370 VT == MVT::nxv8bf16) {
5371 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5372 AArch64::MOVA_2ZMXI_V_H);
5373 return;
5374 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5375 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5376 AArch64::MOVA_2ZMXI_V_S);
5377 return;
5378 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5379 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5380 AArch64::MOVA_2ZMXI_V_D);
5381 return;
5382 }
5383 break;
5384 }
5385 case Intrinsic::aarch64_sme_read_hor_vg4: {
5386 if (VT == MVT::nxv16i8) {
5387 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5388 AArch64::MOVA_4ZMXI_H_B);
5389 return;
5390 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5391 VT == MVT::nxv8bf16) {
5392 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5393 AArch64::MOVA_4ZMXI_H_H);
5394 return;
5395 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5396 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5397 AArch64::MOVA_4ZMXI_H_S);
5398 return;
5399 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5400 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5401 AArch64::MOVA_4ZMXI_H_D);
5402 return;
5403 }
5404 break;
5405 }
5406 case Intrinsic::aarch64_sme_read_ver_vg4: {
5407 if (VT == MVT::nxv16i8) {
5408 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5409 AArch64::MOVA_4ZMXI_V_B);
5410 return;
5411 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5412 VT == MVT::nxv8bf16) {
5413 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5414 AArch64::MOVA_4ZMXI_V_H);
5415 return;
5416 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5417 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5418 AArch64::MOVA_4ZMXI_V_S);
5419 return;
5420 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5421 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5422 AArch64::MOVA_4ZMXI_V_D);
5423 return;
5424 }
5425 break;
5426 }
5427 case Intrinsic::aarch64_sme_read_vg1x2: {
5428 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5429 AArch64::MOVA_VG2_2ZMXI);
5430 return;
5431 }
5432 case Intrinsic::aarch64_sme_read_vg1x4: {
5433 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5434 AArch64::MOVA_VG4_4ZMXI);
5435 return;
5436 }
5437 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5438 if (VT == MVT::nxv16i8) {
5439 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5440 return;
5441 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5442 VT == MVT::nxv8bf16) {
5443 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5444 return;
5445 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5446 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5447 return;
5448 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5449 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5450 return;
5451 }
5452 break;
5453 }
5454 case Intrinsic::aarch64_sme_readz_vert_x2: {
5455 if (VT == MVT::nxv16i8) {
5456 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5457 return;
5458 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5459 VT == MVT::nxv8bf16) {
5460 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5461 return;
5462 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5463 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5464 return;
5465 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5466 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5467 return;
5468 }
5469 break;
5470 }
5471 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5472 if (VT == MVT::nxv16i8) {
5473 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5474 return;
5475 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5476 VT == MVT::nxv8bf16) {
5477 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5478 return;
5479 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5480 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5481 return;
5482 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5483 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5484 return;
5485 }
5486 break;
5487 }
5488 case Intrinsic::aarch64_sme_readz_vert_x4: {
5489 if (VT == MVT::nxv16i8) {
5490 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5491 return;
5492 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5493 VT == MVT::nxv8bf16) {
5494 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5495 return;
5496 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5497 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5498 return;
5499 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5500 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5501 return;
5502 }
5503 break;
5504 }
5505 case Intrinsic::aarch64_sme_readz_x2: {
5506 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5507 AArch64::ZA);
5508 return;
5509 }
5510 case Intrinsic::aarch64_sme_readz_x4: {
5511 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5512 AArch64::ZA);
5513 return;
5514 }
5515 case Intrinsic::swift_async_context_addr: {
5516 SDLoc DL(Node);
5517 SDValue Chain = Node->getOperand(0);
5518 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5519 SDValue Res = SDValue(
5520 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5521 CurDAG->getTargetConstant(8, DL, MVT::i32),
5522 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5523 0);
5524 ReplaceUses(SDValue(Node, 0), Res);
5525 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5526 CurDAG->RemoveDeadNode(Node);
5527
5528 auto &MF = CurDAG->getMachineFunction();
5529 MF.getFrameInfo().setFrameAddressIsTaken(true);
5530 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5531 return;
5532 }
5533 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5534 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5535 Node->getValueType(0),
5536 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5537 AArch64::LUTI2_4ZTZI_S}))
5538 // Second Immediate must be <= 3:
5539 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5540 return;
5541 }
5542 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5543 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5544 Node->getValueType(0),
5545 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5546 // Second Immediate must be <= 1:
5547 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5548 return;
5549 }
5550 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5551 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5552 Node->getValueType(0),
5553 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5554 AArch64::LUTI2_2ZTZI_S}))
5555 // Second Immediate must be <= 7:
5556 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5557 return;
5558 }
5559 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5560 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5561 Node->getValueType(0),
5562 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5563 AArch64::LUTI4_2ZTZI_S}))
5564 // Second Immediate must be <= 3:
5565 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5566 return;
5567 }
5568 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5569 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5570 return;
5571 }
5572 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5573 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5574 Node->getValueType(0),
5575 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5576 SelectCVTIntrinsicFP8(Node, 2, Opc);
5577 return;
5578 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5579 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5580 Node->getValueType(0),
5581 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5582 SelectCVTIntrinsicFP8(Node, 2, Opc);
5583 return;
5584 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5585 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5586 Node->getValueType(0),
5587 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5588 SelectCVTIntrinsicFP8(Node, 2, Opc);
5589 return;
5590 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5591 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5592 Node->getValueType(0),
5593 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5594 SelectCVTIntrinsicFP8(Node, 2, Opc);
5595 return;
5596 }
5597 } break;
5599 unsigned IntNo = Node->getConstantOperandVal(0);
5600 switch (IntNo) {
5601 default:
5602 break;
5603 case Intrinsic::aarch64_tagp:
5604 SelectTagP(Node);
5605 return;
5606
5607 case Intrinsic::ptrauth_auth:
5608 SelectPtrauthAuth(Node);
5609 return;
5610
5611 case Intrinsic::ptrauth_resign:
5612 SelectPtrauthResign(Node);
5613 return;
5614
5615 case Intrinsic::aarch64_neon_tbl2:
5616 SelectTable(Node, 2,
5617 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5618 false);
5619 return;
5620 case Intrinsic::aarch64_neon_tbl3:
5621 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5622 : AArch64::TBLv16i8Three,
5623 false);
5624 return;
5625 case Intrinsic::aarch64_neon_tbl4:
5626 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5627 : AArch64::TBLv16i8Four,
5628 false);
5629 return;
5630 case Intrinsic::aarch64_neon_tbx2:
5631 SelectTable(Node, 2,
5632 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5633 true);
5634 return;
5635 case Intrinsic::aarch64_neon_tbx3:
5636 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5637 : AArch64::TBXv16i8Three,
5638 true);
5639 return;
5640 case Intrinsic::aarch64_neon_tbx4:
5641 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5642 : AArch64::TBXv16i8Four,
5643 true);
5644 return;
5645 case Intrinsic::aarch64_sve_srshl_single_x2:
5646 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5647 Node->getValueType(0),
5648 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5649 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5650 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5651 return;
5652 case Intrinsic::aarch64_sve_srshl_single_x4:
5653 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5654 Node->getValueType(0),
5655 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5656 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5657 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5658 return;
5659 case Intrinsic::aarch64_sve_urshl_single_x2:
5660 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5661 Node->getValueType(0),
5662 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5663 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5664 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5665 return;
5666 case Intrinsic::aarch64_sve_urshl_single_x4:
5667 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5668 Node->getValueType(0),
5669 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5670 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5671 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5672 return;
5673 case Intrinsic::aarch64_sve_srshl_x2:
5674 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5675 Node->getValueType(0),
5676 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5677 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5678 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5679 return;
5680 case Intrinsic::aarch64_sve_srshl_x4:
5681 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5682 Node->getValueType(0),
5683 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5684 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5685 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5686 return;
5687 case Intrinsic::aarch64_sve_urshl_x2:
5688 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5689 Node->getValueType(0),
5690 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5691 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5692 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5693 return;
5694 case Intrinsic::aarch64_sve_urshl_x4:
5695 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5696 Node->getValueType(0),
5697 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5698 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5699 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5700 return;
5701 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5702 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5703 Node->getValueType(0),
5704 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5705 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5706 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5707 return;
5708 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5709 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5710 Node->getValueType(0),
5711 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5712 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5713 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5714 return;
5715 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5716 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5717 Node->getValueType(0),
5718 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5719 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5720 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5721 return;
5722 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5723 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5724 Node->getValueType(0),
5725 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5726 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5727 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5728 return;
5729 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
5730 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5731 Node->getValueType(0),
5732 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
5733 AArch64::FSCALE_2ZZ_D}))
5734 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5735 return;
5736 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
5737 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5738 Node->getValueType(0),
5739 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
5740 AArch64::FSCALE_4ZZ_D}))
5741 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5742 return;
5743 case Intrinsic::aarch64_sme_fp8_scale_x2:
5744 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5745 Node->getValueType(0),
5746 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
5747 AArch64::FSCALE_2Z2Z_D}))
5748 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5749 return;
5750 case Intrinsic::aarch64_sme_fp8_scale_x4:
5751 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5752 Node->getValueType(0),
5753 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
5754 AArch64::FSCALE_4Z4Z_D}))
5755 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5756 return;
5757 case Intrinsic::aarch64_sve_whilege_x2:
5758 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5759 Node->getValueType(0),
5760 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5761 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5762 SelectWhilePair(Node, Op);
5763 return;
5764 case Intrinsic::aarch64_sve_whilegt_x2:
5765 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5766 Node->getValueType(0),
5767 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5768 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5769 SelectWhilePair(Node, Op);
5770 return;
5771 case Intrinsic::aarch64_sve_whilehi_x2:
5772 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5773 Node->getValueType(0),
5774 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5775 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5776 SelectWhilePair(Node, Op);
5777 return;
5778 case Intrinsic::aarch64_sve_whilehs_x2:
5779 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5780 Node->getValueType(0),
5781 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5782 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5783 SelectWhilePair(Node, Op);
5784 return;
5785 case Intrinsic::aarch64_sve_whilele_x2:
5786 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5787 Node->getValueType(0),
5788 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5789 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5790 SelectWhilePair(Node, Op);
5791 return;
5792 case Intrinsic::aarch64_sve_whilelo_x2:
5793 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5794 Node->getValueType(0),
5795 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5796 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5797 SelectWhilePair(Node, Op);
5798 return;
5799 case Intrinsic::aarch64_sve_whilels_x2:
5800 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5801 Node->getValueType(0),
5802 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5803 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5804 SelectWhilePair(Node, Op);
5805 return;
5806 case Intrinsic::aarch64_sve_whilelt_x2:
5807 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5808 Node->getValueType(0),
5809 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5810 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5811 SelectWhilePair(Node, Op);
5812 return;
5813 case Intrinsic::aarch64_sve_smax_single_x2:
5814 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5815 Node->getValueType(0),
5816 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
5817 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
5818 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5819 return;
5820 case Intrinsic::aarch64_sve_umax_single_x2:
5821 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5822 Node->getValueType(0),
5823 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
5824 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
5825 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5826 return;
5827 case Intrinsic::aarch64_sve_fmax_single_x2:
5828 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5829 Node->getValueType(0),
5830 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
5831 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
5832 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5833 return;
5834 case Intrinsic::aarch64_sve_smax_single_x4:
5835 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5836 Node->getValueType(0),
5837 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
5838 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
5839 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5840 return;
5841 case Intrinsic::aarch64_sve_umax_single_x4:
5842 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5843 Node->getValueType(0),
5844 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
5845 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
5846 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5847 return;
5848 case Intrinsic::aarch64_sve_fmax_single_x4:
5849 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5850 Node->getValueType(0),
5851 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
5852 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
5853 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5854 return;
5855 case Intrinsic::aarch64_sve_smin_single_x2:
5856 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5857 Node->getValueType(0),
5858 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
5859 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
5860 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5861 return;
5862 case Intrinsic::aarch64_sve_umin_single_x2:
5863 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5864 Node->getValueType(0),
5865 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
5866 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
5867 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5868 return;
5869 case Intrinsic::aarch64_sve_fmin_single_x2:
5870 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5871 Node->getValueType(0),
5872 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
5873 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
5874 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5875 return;
5876 case Intrinsic::aarch64_sve_smin_single_x4:
5877 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5878 Node->getValueType(0),
5879 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
5880 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
5881 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5882 return;
5883 case Intrinsic::aarch64_sve_umin_single_x4:
5884 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5885 Node->getValueType(0),
5886 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
5887 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
5888 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5889 return;
5890 case Intrinsic::aarch64_sve_fmin_single_x4:
5891 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5892 Node->getValueType(0),
5893 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
5894 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
5895 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5896 return;
5897 case Intrinsic::aarch64_sve_smax_x2:
5898 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5899 Node->getValueType(0),
5900 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
5901 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
5902 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5903 return;
5904 case Intrinsic::aarch64_sve_umax_x2:
5905 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5906 Node->getValueType(0),
5907 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
5908 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
5909 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5910 return;
5911 case Intrinsic::aarch64_sve_fmax_x2:
5912 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5913 Node->getValueType(0),
5914 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
5915 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
5916 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5917 return;
5918 case Intrinsic::aarch64_sve_smax_x4:
5919 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5920 Node->getValueType(0),
5921 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
5922 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
5923 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5924 return;
5925 case Intrinsic::aarch64_sve_umax_x4:
5926 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5927 Node->getValueType(0),
5928 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
5929 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
5930 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5931 return;
5932 case Intrinsic::aarch64_sve_fmax_x4:
5933 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5934 Node->getValueType(0),
5935 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
5936 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
5937 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5938 return;
5939 case Intrinsic::aarch64_sme_famax_x2:
5940 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5941 Node->getValueType(0),
5942 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
5943 AArch64::FAMAX_2Z2Z_D}))
5944 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5945 return;
5946 case Intrinsic::aarch64_sme_famax_x4:
5947 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5948 Node->getValueType(0),
5949 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
5950 AArch64::FAMAX_4Z4Z_D}))
5951 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5952 return;
5953 case Intrinsic::aarch64_sme_famin_x2:
5954 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5955 Node->getValueType(0),
5956 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
5957 AArch64::FAMIN_2Z2Z_D}))
5958 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5959 return;
5960 case Intrinsic::aarch64_sme_famin_x4:
5961 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5962 Node->getValueType(0),
5963 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
5964 AArch64::FAMIN_4Z4Z_D}))
5965 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5966 return;
5967 case Intrinsic::aarch64_sve_smin_x2:
5968 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5969 Node->getValueType(0),
5970 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
5971 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
5972 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5973 return;
5974 case Intrinsic::aarch64_sve_umin_x2:
5975 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5976 Node->getValueType(0),
5977 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
5978 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
5979 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5980 return;
5981 case Intrinsic::aarch64_sve_fmin_x2:
5982 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5983 Node->getValueType(0),
5984 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
5985 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
5986 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5987 return;
5988 case Intrinsic::aarch64_sve_smin_x4:
5989 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5990 Node->getValueType(0),
5991 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
5992 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
5993 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5994 return;
5995 case Intrinsic::aarch64_sve_umin_x4:
5996 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5997 Node->getValueType(0),
5998 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
5999 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6000 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6001 return;
6002 case Intrinsic::aarch64_sve_fmin_x4:
6003 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6004 Node->getValueType(0),
6005 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6006 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6007 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6008 return;
6009 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6010 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6011 Node->getValueType(0),
6012 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6013 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6014 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6015 return;
6016 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6017 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6018 Node->getValueType(0),
6019 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6020 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6021 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6022 return;
6023 case Intrinsic::aarch64_sve_fminnm_single_x2:
6024 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6025 Node->getValueType(0),
6026 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6027 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6028 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6029 return;
6030 case Intrinsic::aarch64_sve_fminnm_single_x4:
6031 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6032 Node->getValueType(0),
6033 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6034 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6035 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6036 return;
6037 case Intrinsic::aarch64_sve_fmaxnm_x2:
6038 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6039 Node->getValueType(0),
6040 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6041 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6042 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6043 return;
6044 case Intrinsic::aarch64_sve_fmaxnm_x4:
6045 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6046 Node->getValueType(0),
6047 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6048 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6049 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6050 return;
6051 case Intrinsic::aarch64_sve_fminnm_x2:
6052 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6053 Node->getValueType(0),
6054 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6055 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6056 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6057 return;
6058 case Intrinsic::aarch64_sve_fminnm_x4:
6059 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6060 Node->getValueType(0),
6061 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6062 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6063 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6064 return;
6065 case Intrinsic::aarch64_sve_fcvtzs_x2:
6066 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6067 return;
6068 case Intrinsic::aarch64_sve_scvtf_x2:
6069 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6070 return;
6071 case Intrinsic::aarch64_sve_fcvtzu_x2:
6072 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6073 return;
6074 case Intrinsic::aarch64_sve_ucvtf_x2:
6075 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6076 return;
6077 case Intrinsic::aarch64_sve_fcvtzs_x4:
6078 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6079 return;
6080 case Intrinsic::aarch64_sve_scvtf_x4:
6081 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6082 return;
6083 case Intrinsic::aarch64_sve_fcvtzu_x4:
6084 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6085 return;
6086 case Intrinsic::aarch64_sve_ucvtf_x4:
6087 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6088 return;
6089 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6090 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6091 return;
6092 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6093 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6094 return;
6095 case Intrinsic::aarch64_sve_sclamp_single_x2:
6096 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6097 Node->getValueType(0),
6098 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6099 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6100 SelectClamp(Node, 2, Op);
6101 return;
6102 case Intrinsic::aarch64_sve_uclamp_single_x2:
6103 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6104 Node->getValueType(0),
6105 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6106 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6107 SelectClamp(Node, 2, Op);
6108 return;
6109 case Intrinsic::aarch64_sve_fclamp_single_x2:
6110 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6111 Node->getValueType(0),
6112 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6113 AArch64::FCLAMP_VG2_2Z2Z_D}))
6114 SelectClamp(Node, 2, Op);
6115 return;
6116 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6117 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6118 return;
6119 case Intrinsic::aarch64_sve_sclamp_single_x4:
6120 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6121 Node->getValueType(0),
6122 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6123 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6124 SelectClamp(Node, 4, Op);
6125 return;
6126 case Intrinsic::aarch64_sve_uclamp_single_x4:
6127 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6128 Node->getValueType(0),
6129 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6130 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6131 SelectClamp(Node, 4, Op);
6132 return;
6133 case Intrinsic::aarch64_sve_fclamp_single_x4:
6134 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6135 Node->getValueType(0),
6136 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6137 AArch64::FCLAMP_VG4_4Z4Z_D}))
6138 SelectClamp(Node, 4, Op);
6139 return;
6140 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6141 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6142 return;
6143 case Intrinsic::aarch64_sve_add_single_x2:
6144 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6145 Node->getValueType(0),
6146 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6147 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6148 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6149 return;
6150 case Intrinsic::aarch64_sve_add_single_x4:
6151 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6152 Node->getValueType(0),
6153 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6154 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6155 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6156 return;
6157 case Intrinsic::aarch64_sve_zip_x2:
6158 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6159 Node->getValueType(0),
6160 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6161 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6162 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6163 return;
6164 case Intrinsic::aarch64_sve_zipq_x2:
6165 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6166 AArch64::ZIP_VG2_2ZZZ_Q);
6167 return;
6168 case Intrinsic::aarch64_sve_zip_x4:
6169 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6170 Node->getValueType(0),
6171 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6172 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6173 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6174 return;
6175 case Intrinsic::aarch64_sve_zipq_x4:
6176 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6177 AArch64::ZIP_VG4_4Z4Z_Q);
6178 return;
6179 case Intrinsic::aarch64_sve_uzp_x2:
6180 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6181 Node->getValueType(0),
6182 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6183 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6184 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6185 return;
6186 case Intrinsic::aarch64_sve_uzpq_x2:
6187 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6188 AArch64::UZP_VG2_2ZZZ_Q);
6189 return;
6190 case Intrinsic::aarch64_sve_uzp_x4:
6191 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6192 Node->getValueType(0),
6193 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6194 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6195 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6196 return;
6197 case Intrinsic::aarch64_sve_uzpq_x4:
6198 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6199 AArch64::UZP_VG4_4Z4Z_Q);
6200 return;
6201 case Intrinsic::aarch64_sve_sel_x2:
6202 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6203 Node->getValueType(0),
6204 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6205 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6206 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6207 return;
6208 case Intrinsic::aarch64_sve_sel_x4:
6209 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6210 Node->getValueType(0),
6211 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6212 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6213 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6214 return;
6215 case Intrinsic::aarch64_sve_frinta_x2:
6216 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6217 return;
6218 case Intrinsic::aarch64_sve_frinta_x4:
6219 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6220 return;
6221 case Intrinsic::aarch64_sve_frintm_x2:
6222 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6223 return;
6224 case Intrinsic::aarch64_sve_frintm_x4:
6225 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6226 return;
6227 case Intrinsic::aarch64_sve_frintn_x2:
6228 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6229 return;
6230 case Intrinsic::aarch64_sve_frintn_x4:
6231 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6232 return;
6233 case Intrinsic::aarch64_sve_frintp_x2:
6234 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6235 return;
6236 case Intrinsic::aarch64_sve_frintp_x4:
6237 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6238 return;
6239 case Intrinsic::aarch64_sve_sunpk_x2:
6240 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6241 Node->getValueType(0),
6242 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6243 AArch64::SUNPK_VG2_2ZZ_D}))
6244 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6245 return;
6246 case Intrinsic::aarch64_sve_uunpk_x2:
6247 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6248 Node->getValueType(0),
6249 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6250 AArch64::UUNPK_VG2_2ZZ_D}))
6251 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6252 return;
6253 case Intrinsic::aarch64_sve_sunpk_x4:
6254 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6255 Node->getValueType(0),
6256 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6257 AArch64::SUNPK_VG4_4Z2Z_D}))
6258 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6259 return;
6260 case Intrinsic::aarch64_sve_uunpk_x4:
6261 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6262 Node->getValueType(0),
6263 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6264 AArch64::UUNPK_VG4_4Z2Z_D}))
6265 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6266 return;
6267 case Intrinsic::aarch64_sve_pext_x2: {
6268 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6269 Node->getValueType(0),
6270 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6271 AArch64::PEXT_2PCI_D}))
6272 SelectPExtPair(Node, Op);
6273 return;
6274 }
6275 }
6276 break;
6277 }
6278 case ISD::INTRINSIC_VOID: {
6279 unsigned IntNo = Node->getConstantOperandVal(1);
6280 if (Node->getNumOperands() >= 3)
6281 VT = Node->getOperand(2)->getValueType(0);
6282 switch (IntNo) {
6283 default:
6284 break;
6285 case Intrinsic::aarch64_neon_st1x2: {
6286 if (VT == MVT::v8i8) {
6287 SelectStore(Node, 2, AArch64::ST1Twov8b);
6288 return;
6289 } else if (VT == MVT::v16i8) {
6290 SelectStore(Node, 2, AArch64::ST1Twov16b);
6291 return;
6292 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6293 VT == MVT::v4bf16) {
6294 SelectStore(Node, 2, AArch64::ST1Twov4h);
6295 return;
6296 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6297 VT == MVT::v8bf16) {
6298 SelectStore(Node, 2, AArch64::ST1Twov8h);
6299 return;
6300 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6301 SelectStore(Node, 2, AArch64::ST1Twov2s);
6302 return;
6303 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6304 SelectStore(Node, 2, AArch64::ST1Twov4s);
6305 return;
6306 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6307 SelectStore(Node, 2, AArch64::ST1Twov2d);
6308 return;
6309 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6310 SelectStore(Node, 2, AArch64::ST1Twov1d);
6311 return;
6312 }
6313 break;
6314 }
6315 case Intrinsic::aarch64_neon_st1x3: {
6316 if (VT == MVT::v8i8) {
6317 SelectStore(Node, 3, AArch64::ST1Threev8b);
6318 return;
6319 } else if (VT == MVT::v16i8) {
6320 SelectStore(Node, 3, AArch64::ST1Threev16b);
6321 return;
6322 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6323 VT == MVT::v4bf16) {
6324 SelectStore(Node, 3, AArch64::ST1Threev4h);
6325 return;
6326 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6327 VT == MVT::v8bf16) {
6328 SelectStore(Node, 3, AArch64::ST1Threev8h);
6329 return;
6330 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6331 SelectStore(Node, 3, AArch64::ST1Threev2s);
6332 return;
6333 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6334 SelectStore(Node, 3, AArch64::ST1Threev4s);
6335 return;
6336 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6337 SelectStore(Node, 3, AArch64::ST1Threev2d);
6338 return;
6339 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6340 SelectStore(Node, 3, AArch64::ST1Threev1d);
6341 return;
6342 }
6343 break;
6344 }
6345 case Intrinsic::aarch64_neon_st1x4: {
6346 if (VT == MVT::v8i8) {
6347 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6348 return;
6349 } else if (VT == MVT::v16i8) {
6350 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6351 return;
6352 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6353 VT == MVT::v4bf16) {
6354 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6355 return;
6356 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6357 VT == MVT::v8bf16) {
6358 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6359 return;
6360 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6361 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6362 return;
6363 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6364 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6365 return;
6366 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6367 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6368 return;
6369 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6370 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6371 return;
6372 }
6373 break;
6374 }
6375 case Intrinsic::aarch64_neon_st2: {
6376 if (VT == MVT::v8i8) {
6377 SelectStore(Node, 2, AArch64::ST2Twov8b);
6378 return;
6379 } else if (VT == MVT::v16i8) {
6380 SelectStore(Node, 2, AArch64::ST2Twov16b);
6381 return;
6382 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6383 VT == MVT::v4bf16) {
6384 SelectStore(Node, 2, AArch64::ST2Twov4h);
6385 return;
6386 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6387 VT == MVT::v8bf16) {
6388 SelectStore(Node, 2, AArch64::ST2Twov8h);
6389 return;
6390 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6391 SelectStore(Node, 2, AArch64::ST2Twov2s);
6392 return;
6393 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6394 SelectStore(Node, 2, AArch64::ST2Twov4s);
6395 return;
6396 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6397 SelectStore(Node, 2, AArch64::ST2Twov2d);
6398 return;
6399 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6400 SelectStore(Node, 2, AArch64::ST1Twov1d);
6401 return;
6402 }
6403 break;
6404 }
6405 case Intrinsic::aarch64_neon_st3: {
6406 if (VT == MVT::v8i8) {
6407 SelectStore(Node, 3, AArch64::ST3Threev8b);
6408 return;
6409 } else if (VT == MVT::v16i8) {
6410 SelectStore(Node, 3, AArch64::ST3Threev16b);
6411 return;
6412 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6413 VT == MVT::v4bf16) {
6414 SelectStore(Node, 3, AArch64::ST3Threev4h);
6415 return;
6416 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6417 VT == MVT::v8bf16) {
6418 SelectStore(Node, 3, AArch64::ST3Threev8h);
6419 return;
6420 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6421 SelectStore(Node, 3, AArch64::ST3Threev2s);
6422 return;
6423 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6424 SelectStore(Node, 3, AArch64::ST3Threev4s);
6425 return;
6426 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6427 SelectStore(Node, 3, AArch64::ST3Threev2d);
6428 return;
6429 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6430 SelectStore(Node, 3, AArch64::ST1Threev1d);
6431 return;
6432 }
6433 break;
6434 }
6435 case Intrinsic::aarch64_neon_st4: {
6436 if (VT == MVT::v8i8) {
6437 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6438 return;
6439 } else if (VT == MVT::v16i8) {
6440 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6441 return;
6442 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6443 VT == MVT::v4bf16) {
6444 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6445 return;
6446 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6447 VT == MVT::v8bf16) {
6448 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6449 return;
6450 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6451 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6452 return;
6453 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6454 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6455 return;
6456 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6457 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6458 return;
6459 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6460 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6461 return;
6462 }
6463 break;
6464 }
6465 case Intrinsic::aarch64_neon_st2lane: {
6466 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6467 SelectStoreLane(Node, 2, AArch64::ST2i8);
6468 return;
6469 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6470 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6471 SelectStoreLane(Node, 2, AArch64::ST2i16);
6472 return;
6473 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6474 VT == MVT::v2f32) {
6475 SelectStoreLane(Node, 2, AArch64::ST2i32);
6476 return;
6477 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6478 VT == MVT::v1f64) {
6479 SelectStoreLane(Node, 2, AArch64::ST2i64);
6480 return;
6481 }
6482 break;
6483 }
6484 case Intrinsic::aarch64_neon_st3lane: {
6485 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6486 SelectStoreLane(Node, 3, AArch64::ST3i8);
6487 return;
6488 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6489 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6490 SelectStoreLane(Node, 3, AArch64::ST3i16);
6491 return;
6492 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6493 VT == MVT::v2f32) {
6494 SelectStoreLane(Node, 3, AArch64::ST3i32);
6495 return;
6496 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6497 VT == MVT::v1f64) {
6498 SelectStoreLane(Node, 3, AArch64::ST3i64);
6499 return;
6500 }
6501 break;
6502 }
6503 case Intrinsic::aarch64_neon_st4lane: {
6504 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6505 SelectStoreLane(Node, 4, AArch64::ST4i8);
6506 return;
6507 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6508 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6509 SelectStoreLane(Node, 4, AArch64::ST4i16);
6510 return;
6511 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6512 VT == MVT::v2f32) {
6513 SelectStoreLane(Node, 4, AArch64::ST4i32);
6514 return;
6515 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6516 VT == MVT::v1f64) {
6517 SelectStoreLane(Node, 4, AArch64::ST4i64);
6518 return;
6519 }
6520 break;
6521 }
6522 case Intrinsic::aarch64_sve_st2q: {
6523 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6524 return;
6525 }
6526 case Intrinsic::aarch64_sve_st3q: {
6527 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6528 return;
6529 }
6530 case Intrinsic::aarch64_sve_st4q: {
6531 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6532 return;
6533 }
6534 case Intrinsic::aarch64_sve_st2: {
6535 if (VT == MVT::nxv16i8) {
6536 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6537 return;
6538 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6539 VT == MVT::nxv8bf16) {
6540 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6541 return;
6542 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6543 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6544 return;
6545 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6546 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6547 return;
6548 }
6549 break;
6550 }
6551 case Intrinsic::aarch64_sve_st3: {
6552 if (VT == MVT::nxv16i8) {
6553 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6554 return;
6555 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6556 VT == MVT::nxv8bf16) {
6557 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6558 return;
6559 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6560 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6561 return;
6562 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6563 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6564 return;
6565 }
6566 break;
6567 }
6568 case Intrinsic::aarch64_sve_st4: {
6569 if (VT == MVT::nxv16i8) {
6570 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6571 return;
6572 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6573 VT == MVT::nxv8bf16) {
6574 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6575 return;
6576 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6577 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6578 return;
6579 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6580 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6581 return;
6582 }
6583 break;
6584 }
6585 }
6586 break;
6587 }
6588 case AArch64ISD::LD2post: {
6589 if (VT == MVT::v8i8) {
6590 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6591 return;
6592 } else if (VT == MVT::v16i8) {
6593 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6594 return;
6595 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6596 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6597 return;
6598 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6599 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6600 return;
6601 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6602 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6603 return;
6604 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6605 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6606 return;
6607 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6608 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6609 return;
6610 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6611 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6612 return;
6613 }
6614 break;
6615 }
6616 case AArch64ISD::LD3post: {
6617 if (VT == MVT::v8i8) {
6618 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6619 return;
6620 } else if (VT == MVT::v16i8) {
6621 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6622 return;
6623 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6624 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6625 return;
6626 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6627 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6628 return;
6629 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6630 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6631 return;
6632 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6633 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6634 return;
6635 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6636 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6637 return;
6638 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6639 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6640 return;
6641 }
6642 break;
6643 }
6644 case AArch64ISD::LD4post: {
6645 if (VT == MVT::v8i8) {
6646 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6647 return;
6648 } else if (VT == MVT::v16i8) {
6649 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6650 return;
6651 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6652 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6653 return;
6654 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6655 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6656 return;
6657 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6658 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6659 return;
6660 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6661 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6662 return;
6663 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6664 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6665 return;
6666 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6667 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6668 return;
6669 }
6670 break;
6671 }
6672 case AArch64ISD::LD1x2post: {
6673 if (VT == MVT::v8i8) {
6674 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6675 return;
6676 } else if (VT == MVT::v16i8) {
6677 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6678 return;
6679 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6680 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6681 return;
6682 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6683 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6684 return;
6685 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6686 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6687 return;
6688 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6689 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6690 return;
6691 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6692 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6693 return;
6694 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6695 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6696 return;
6697 }
6698 break;
6699 }
6700 case AArch64ISD::LD1x3post: {
6701 if (VT == MVT::v8i8) {
6702 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6703 return;
6704 } else if (VT == MVT::v16i8) {
6705 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6706 return;
6707 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6708 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6709 return;
6710 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6711 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6712 return;
6713 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6714 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6715 return;
6716 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6717 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6718 return;
6719 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6720 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6721 return;
6722 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6723 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6724 return;
6725 }
6726 break;
6727 }
6728 case AArch64ISD::LD1x4post: {
6729 if (VT == MVT::v8i8) {
6730 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6731 return;
6732 } else if (VT == MVT::v16i8) {
6733 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6734 return;
6735 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6736 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6737 return;
6738 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6739 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6740 return;
6741 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6742 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6743 return;
6744 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6745 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6746 return;
6747 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6748 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6749 return;
6750 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6751 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6752 return;
6753 }
6754 break;
6755 }
6757 if (VT == MVT::v8i8) {
6758 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6759 return;
6760 } else if (VT == MVT::v16i8) {
6761 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6762 return;
6763 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6764 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6765 return;
6766 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6767 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6768 return;
6769 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6770 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6771 return;
6772 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6773 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6774 return;
6775 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6776 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6777 return;
6778 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6779 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6780 return;
6781 }
6782 break;
6783 }
6785 if (VT == MVT::v8i8) {
6786 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6787 return;
6788 } else if (VT == MVT::v16i8) {
6789 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6790 return;
6791 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6792 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6793 return;
6794 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6795 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
6796 return;
6797 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6798 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
6799 return;
6800 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6801 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6802 return;
6803 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6804 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
6805 return;
6806 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6807 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
6808 return;
6809 }
6810 break;
6811 }
6813 if (VT == MVT::v8i8) {
6814 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
6815 return;
6816 } else if (VT == MVT::v16i8) {
6817 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
6818 return;
6819 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6820 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
6821 return;
6822 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6823 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
6824 return;
6825 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6826 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
6827 return;
6828 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6829 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
6830 return;
6831 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6832 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
6833 return;
6834 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6835 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
6836 return;
6837 }
6838 break;
6839 }
6841 if (VT == MVT::v8i8) {
6842 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
6843 return;
6844 } else if (VT == MVT::v16i8) {
6845 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
6846 return;
6847 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6848 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
6849 return;
6850 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6851 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
6852 return;
6853 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6854 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
6855 return;
6856 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6857 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
6858 return;
6859 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6860 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
6861 return;
6862 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6863 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
6864 return;
6865 }
6866 break;
6867 }
6869 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6870 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
6871 return;
6872 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6873 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6874 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
6875 return;
6876 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6877 VT == MVT::v2f32) {
6878 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
6879 return;
6880 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6881 VT == MVT::v1f64) {
6882 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
6883 return;
6884 }
6885 break;
6886 }
6888 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6889 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
6890 return;
6891 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6892 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6893 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
6894 return;
6895 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6896 VT == MVT::v2f32) {
6897 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
6898 return;
6899 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6900 VT == MVT::v1f64) {
6901 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
6902 return;
6903 }
6904 break;
6905 }
6907 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6908 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
6909 return;
6910 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6911 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6912 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
6913 return;
6914 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6915 VT == MVT::v2f32) {
6916 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
6917 return;
6918 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6919 VT == MVT::v1f64) {
6920 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
6921 return;
6922 }
6923 break;
6924 }
6926 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6927 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
6928 return;
6929 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6930 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6931 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
6932 return;
6933 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6934 VT == MVT::v2f32) {
6935 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
6936 return;
6937 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6938 VT == MVT::v1f64) {
6939 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
6940 return;
6941 }
6942 break;
6943 }
6944 case AArch64ISD::ST2post: {
6945 VT = Node->getOperand(1).getValueType();
6946 if (VT == MVT::v8i8) {
6947 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
6948 return;
6949 } else if (VT == MVT::v16i8) {
6950 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
6951 return;
6952 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6953 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
6954 return;
6955 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6956 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
6957 return;
6958 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6959 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
6960 return;
6961 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6962 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
6963 return;
6964 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6965 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
6966 return;
6967 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6968 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6969 return;
6970 }
6971 break;
6972 }
6973 case AArch64ISD::ST3post: {
6974 VT = Node->getOperand(1).getValueType();
6975 if (VT == MVT::v8i8) {
6976 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
6977 return;
6978 } else if (VT == MVT::v16i8) {
6979 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
6980 return;
6981 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6982 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
6983 return;
6984 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6985 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
6986 return;
6987 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6988 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
6989 return;
6990 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6991 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
6992 return;
6993 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6994 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
6995 return;
6996 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6997 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6998 return;
6999 }
7000 break;
7001 }
7002 case AArch64ISD::ST4post: {
7003 VT = Node->getOperand(1).getValueType();
7004 if (VT == MVT::v8i8) {
7005 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7006 return;
7007 } else if (VT == MVT::v16i8) {
7008 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7009 return;
7010 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7011 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7012 return;
7013 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7014 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7015 return;
7016 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7017 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7018 return;
7019 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7020 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7021 return;
7022 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7023 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7024 return;
7025 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7026 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7027 return;
7028 }
7029 break;
7030 }
7031 case AArch64ISD::ST1x2post: {
7032 VT = Node->getOperand(1).getValueType();
7033 if (VT == MVT::v8i8) {
7034 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7035 return;
7036 } else if (VT == MVT::v16i8) {
7037 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7038 return;
7039 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7040 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7041 return;
7042 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7043 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7044 return;
7045 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7046 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7047 return;
7048 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7049 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7050 return;
7051 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7052 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7053 return;
7054 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7055 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7056 return;
7057 }
7058 break;
7059 }
7060 case AArch64ISD::ST1x3post: {
7061 VT = Node->getOperand(1).getValueType();
7062 if (VT == MVT::v8i8) {
7063 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7064 return;
7065 } else if (VT == MVT::v16i8) {
7066 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7067 return;
7068 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7069 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7070 return;
7071 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7072 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7073 return;
7074 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7075 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7076 return;
7077 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7078 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7079 return;
7080 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7081 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7082 return;
7083 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7084 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7085 return;
7086 }
7087 break;
7088 }
7089 case AArch64ISD::ST1x4post: {
7090 VT = Node->getOperand(1).getValueType();
7091 if (VT == MVT::v8i8) {
7092 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7093 return;
7094 } else if (VT == MVT::v16i8) {
7095 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7096 return;
7097 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7098 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7099 return;
7100 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7101 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7102 return;
7103 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7104 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7105 return;
7106 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7107 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7108 return;
7109 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7110 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7111 return;
7112 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7113 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7114 return;
7115 }
7116 break;
7117 }
7119 VT = Node->getOperand(1).getValueType();
7120 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7121 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7122 return;
7123 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7124 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7125 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7126 return;
7127 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7128 VT == MVT::v2f32) {
7129 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7130 return;
7131 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7132 VT == MVT::v1f64) {
7133 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7134 return;
7135 }
7136 break;
7137 }
7139 VT = Node->getOperand(1).getValueType();
7140 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7141 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7142 return;
7143 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7144 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7145 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7146 return;
7147 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7148 VT == MVT::v2f32) {
7149 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7150 return;
7151 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7152 VT == MVT::v1f64) {
7153 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7154 return;
7155 }
7156 break;
7157 }
7159 VT = Node->getOperand(1).getValueType();
7160 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7161 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7162 return;
7163 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7164 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7165 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7166 return;
7167 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7168 VT == MVT::v2f32) {
7169 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7170 return;
7171 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7172 VT == MVT::v1f64) {
7173 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7174 return;
7175 }
7176 break;
7177 }
7179 if (VT == MVT::nxv16i8) {
7180 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
7181 return;
7182 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7183 VT == MVT::nxv8bf16) {
7184 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
7185 return;
7186 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7187 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
7188 return;
7189 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7190 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
7191 return;
7192 }
7193 break;
7194 }
7196 if (VT == MVT::nxv16i8) {
7197 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
7198 return;
7199 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7200 VT == MVT::nxv8bf16) {
7201 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
7202 return;
7203 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7204 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
7205 return;
7206 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7207 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
7208 return;
7209 }
7210 break;
7211 }
7213 if (VT == MVT::nxv16i8) {
7214 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
7215 return;
7216 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7217 VT == MVT::nxv8bf16) {
7218 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
7219 return;
7220 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7221 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
7222 return;
7223 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7224 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
7225 return;
7226 }
7227 break;
7228 }
7229 }
7230
7231 // Select the default instruction
7232 SelectCode(Node);
7233}
7234
7235/// createAArch64ISelDag - This pass converts a legalized DAG into a
7236/// AArch64-specific DAG, ready for instruction scheduling.
7238 CodeGenOptLevel OptLevel) {
7239 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7240}
7241
7242/// When \p PredVT is a scalable vector predicate in the form
7243/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7244/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7245/// structured vectors (NumVec >1), the output data type is
7246/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7247/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7248/// EVT.
7250 unsigned NumVec) {
7251 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7252 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7253 return EVT();
7254
7255 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7256 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7257 return EVT();
7258
7259 ElementCount EC = PredVT.getVectorElementCount();
7260 EVT ScalarVT =
7261 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7262 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7263
7264 return MemVT;
7265}
7266
7267/// Return the EVT of the data associated to a memory operation in \p
7268/// Root. If such EVT cannot be retrived, it returns an invalid EVT.
7270 if (isa<MemSDNode>(Root))
7271 return cast<MemSDNode>(Root)->getMemoryVT();
7272
7273 if (isa<MemIntrinsicSDNode>(Root))
7274 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
7275
7276 const unsigned Opcode = Root->getOpcode();
7277 // For custom ISD nodes, we have to look at them individually to extract the
7278 // type of the data moved to/from memory.
7279 switch (Opcode) {
7284 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7286 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7289 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
7292 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
7295 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
7296 default:
7297 break;
7298 }
7299
7300 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7301 return EVT();
7302
7303 switch (Root->getConstantOperandVal(1)) {
7304 default:
7305 return EVT();
7306 case Intrinsic::aarch64_sme_ldr:
7307 case Intrinsic::aarch64_sme_str:
7308 return MVT::nxv16i8;
7309 case Intrinsic::aarch64_sve_prf:
7310 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7311 // width of the predicate.
7313 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7314 case Intrinsic::aarch64_sve_ld2_sret:
7315 case Intrinsic::aarch64_sve_ld2q_sret:
7317 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7318 case Intrinsic::aarch64_sve_st2q:
7320 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7321 case Intrinsic::aarch64_sve_ld3_sret:
7322 case Intrinsic::aarch64_sve_ld3q_sret:
7324 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7325 case Intrinsic::aarch64_sve_st3q:
7327 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7328 case Intrinsic::aarch64_sve_ld4_sret:
7329 case Intrinsic::aarch64_sve_ld4q_sret:
7331 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7332 case Intrinsic::aarch64_sve_st4q:
7334 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7335 case Intrinsic::aarch64_sve_ld1udq:
7336 case Intrinsic::aarch64_sve_st1dq:
7337 return EVT(MVT::nxv1i64);
7338 case Intrinsic::aarch64_sve_ld1uwq:
7339 case Intrinsic::aarch64_sve_st1wq:
7340 return EVT(MVT::nxv1i32);
7341 }
7342}
7343
7344/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7345/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7346/// where Root is the memory access using N for its address.
7347template <int64_t Min, int64_t Max>
7348bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7349 SDValue &Base,
7350 SDValue &OffImm) {
7351 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7352 const DataLayout &DL = CurDAG->getDataLayout();
7353 const MachineFrameInfo &MFI = MF->getFrameInfo();
7354
7355 if (N.getOpcode() == ISD::FrameIndex) {
7356 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7357 // We can only encode VL scaled offsets, so only fold in frame indexes
7358 // referencing SVE objects.
7360 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7361 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7362 return true;
7363 }
7364
7365 return false;
7366 }
7367
7368 if (MemVT == EVT())
7369 return false;
7370
7371 if (N.getOpcode() != ISD::ADD)
7372 return false;
7373
7374 SDValue VScale = N.getOperand(1);
7375 if (VScale.getOpcode() != ISD::VSCALE)
7376 return false;
7377
7378 TypeSize TS = MemVT.getSizeInBits();
7379 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7380 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7381
7382 if ((MulImm % MemWidthBytes) != 0)
7383 return false;
7384
7385 int64_t Offset = MulImm / MemWidthBytes;
7386 if (Offset < Min || Offset > Max)
7387 return false;
7388
7389 Base = N.getOperand(0);
7390 if (Base.getOpcode() == ISD::FrameIndex) {
7391 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7392 // We can only encode VL scaled offsets, so only fold in frame indexes
7393 // referencing SVE objects.
7395 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7396 }
7397
7398 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7399 return true;
7400}
7401
7402/// Select register plus register addressing mode for SVE, with scaled
7403/// offset.
7404bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7405 SDValue &Base,
7406 SDValue &Offset) {
7407 if (N.getOpcode() != ISD::ADD)
7408 return false;
7409
7410 // Process an ADD node.
7411 const SDValue LHS = N.getOperand(0);
7412 const SDValue RHS = N.getOperand(1);
7413
7414 // 8 bit data does not come with the SHL node, so it is treated
7415 // separately.
7416 if (Scale == 0) {
7417 Base = LHS;
7418 Offset = RHS;
7419 return true;
7420 }
7421
7422 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7423 int64_t ImmOff = C->getSExtValue();
7424 unsigned Size = 1 << Scale;
7425
7426 // To use the reg+reg addressing mode, the immediate must be a multiple of
7427 // the vector element's byte size.
7428 if (ImmOff % Size)
7429 return false;
7430
7431 SDLoc DL(N);
7432 Base = LHS;
7433 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7434 SDValue Ops[] = {Offset};
7435 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7436 Offset = SDValue(MI, 0);
7437 return true;
7438 }
7439
7440 // Check if the RHS is a shift node with a constant.
7441 if (RHS.getOpcode() != ISD::SHL)
7442 return false;
7443
7444 const SDValue ShiftRHS = RHS.getOperand(1);
7445 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7446 if (C->getZExtValue() == Scale) {
7447 Base = LHS;
7448 Offset = RHS.getOperand(0);
7449 return true;
7450 }
7451
7452 return false;
7453}
7454
7455bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7456 const AArch64TargetLowering *TLI =
7457 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7458
7459 return TLI->isAllActivePredicate(*CurDAG, N);
7460}
7461
7462bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7463 EVT VT = N.getValueType();
7464 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7465}
7466
7467bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7469 unsigned Scale) {
7470 // Try to untangle an ADD node into a 'reg + offset'
7471 if (CurDAG->isBaseWithConstantOffset(N))
7472 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
7473 int64_t ImmOff = C->getSExtValue();
7474 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
7475 Base = N.getOperand(0);
7476 Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7477 return true;
7478 }
7479 }
7480
7481 // By default, just match reg + 0.
7482 Base = N;
7483 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7484 return true;
7485}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
#define PASS_NAME
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
#define DEBUG_TYPE
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
aarch64 promote const
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Size
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * RHS
Value * LHS
support::ulittle16_t & Lo
Definition: aarch32.cpp:204
support::ulittle16_t & Hi
Definition: aarch32.cpp:203
DEMANGLE_DUMP_METHOD void dump() const
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
bool getExactInverse(APFloat *inv) const
Definition: APFloat.h:1479
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1321
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1649
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1007
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:258
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1618
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1577
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1434
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:510
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
iterator begin() const
Definition: ArrayRef.h:156
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint8_t getStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:456
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:698
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
size_t size() const
Definition: SmallVector.h:78
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:700
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:927
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
const SysReg * lookupSysRegByName(StringRef)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1312
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1308
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1407
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1319
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:223
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1562
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1593
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:480
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:279
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1952
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Extended Value Type.
Definition: ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:202
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43