LLVM 20.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34//===--------------------------------------------------------------------===//
35/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
36/// instructions for SelectionDAG operations.
37///
38namespace {
39
40class AArch64DAGToDAGISel : public SelectionDAGISel {
41
42 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
43 /// make the right decision when generating code for different targets.
44 const AArch64Subtarget *Subtarget;
45
46public:
47 AArch64DAGToDAGISel() = delete;
48
49 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
50 CodeGenOptLevel OptLevel)
51 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
52
53 bool runOnMachineFunction(MachineFunction &MF) override {
54 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
56 }
57
58 void Select(SDNode *Node) override;
59
60 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
61 /// inline asm expressions.
63 InlineAsm::ConstraintCode ConstraintID,
64 std::vector<SDValue> &OutOps) override;
65
66 template <signed Low, signed High, signed Scale>
67 bool SelectRDVLImm(SDValue N, SDValue &Imm);
68
69 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
70 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
71 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
72 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
74 return SelectShiftedRegister(N, false, Reg, Shift);
75 }
76 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
77 return SelectShiftedRegister(N, true, Reg, Shift);
78 }
79 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
80 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
81 }
82 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
83 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
84 }
85 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
86 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
87 }
88 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
89 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
90 }
91 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
92 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
93 }
94 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
95 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
96 }
97 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
98 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
99 }
100 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
101 return SelectAddrModeIndexed(N, 1, Base, OffImm);
102 }
103 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
104 return SelectAddrModeIndexed(N, 2, Base, OffImm);
105 }
106 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
107 return SelectAddrModeIndexed(N, 4, Base, OffImm);
108 }
109 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
110 return SelectAddrModeIndexed(N, 8, Base, OffImm);
111 }
112 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
113 return SelectAddrModeIndexed(N, 16, Base, OffImm);
114 }
115 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
116 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
117 }
118 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
119 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
120 }
121 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
122 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
123 }
124 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
125 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
126 }
127 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
128 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
129 }
130 template <unsigned Size, unsigned Max>
131 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
132 // Test if there is an appropriate addressing mode and check if the
133 // immediate fits.
134 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
135 if (Found) {
136 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
137 int64_t C = CI->getSExtValue();
138 if (C <= Max)
139 return true;
140 }
141 }
142
143 // Otherwise, base only, materialize address in register.
144 Base = N;
145 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
146 return true;
147 }
148
149 template<int Width>
150 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
151 SDValue &SignExtend, SDValue &DoShift) {
152 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
153 }
154
155 template<int Width>
156 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
157 SDValue &SignExtend, SDValue &DoShift) {
158 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
159 }
160
161 bool SelectExtractHigh(SDValue N, SDValue &Res) {
162 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
163 N = N->getOperand(0);
164 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
165 !isa<ConstantSDNode>(N->getOperand(1)))
166 return false;
167 EVT VT = N->getValueType(0);
168 EVT LVT = N->getOperand(0).getValueType();
169 unsigned Index = N->getConstantOperandVal(1);
170 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
172 return false;
173 Res = N->getOperand(0);
174 return true;
175 }
176
177 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
178 if (N.getOpcode() != AArch64ISD::VLSHR)
179 return false;
180 SDValue Op = N->getOperand(0);
181 EVT VT = Op.getValueType();
182 unsigned ShtAmt = N->getConstantOperandVal(1);
183 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
184 return false;
185
186 APInt Imm;
187 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
188 Imm = APInt(VT.getScalarSizeInBits(),
189 Op.getOperand(1).getConstantOperandVal(0)
190 << Op.getOperand(1).getConstantOperandVal(1));
191 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
192 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
193 Imm = APInt(VT.getScalarSizeInBits(),
194 Op.getOperand(1).getConstantOperandVal(0));
195 else
196 return false;
197
198 if (Imm != 1ULL << (ShtAmt - 1))
199 return false;
200
201 Res1 = Op.getOperand(0);
202 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
203 return true;
204 }
205
206 bool SelectDupZeroOrUndef(SDValue N) {
207 switch(N->getOpcode()) {
208 case ISD::UNDEF:
209 return true;
210 case AArch64ISD::DUP:
211 case ISD::SPLAT_VECTOR: {
212 auto Opnd0 = N->getOperand(0);
213 if (isNullConstant(Opnd0))
214 return true;
215 if (isNullFPConstant(Opnd0))
216 return true;
217 break;
218 }
219 default:
220 break;
221 }
222
223 return false;
224 }
225
226 bool SelectDupZero(SDValue N) {
227 switch(N->getOpcode()) {
228 case AArch64ISD::DUP:
229 case ISD::SPLAT_VECTOR: {
230 auto Opnd0 = N->getOperand(0);
231 if (isNullConstant(Opnd0))
232 return true;
233 if (isNullFPConstant(Opnd0))
234 return true;
235 break;
236 }
237 }
238
239 return false;
240 }
241
242 template<MVT::SimpleValueType VT>
243 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
244 return SelectSVEAddSubImm(N, VT, Imm, Shift);
245 }
246
247 template <MVT::SimpleValueType VT, bool Negate>
248 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
249 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
250 }
251
252 template <MVT::SimpleValueType VT>
253 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
254 return SelectSVECpyDupImm(N, VT, Imm, Shift);
255 }
256
257 template <MVT::SimpleValueType VT, bool Invert = false>
258 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
259 return SelectSVELogicalImm(N, VT, Imm, Invert);
260 }
261
262 template <MVT::SimpleValueType VT>
263 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
264 return SelectSVEArithImm(N, VT, Imm);
265 }
266
267 template <unsigned Low, unsigned High, bool AllowSaturation = false>
268 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
269 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
270 }
271
272 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
273 if (N->getOpcode() != ISD::SPLAT_VECTOR)
274 return false;
275
276 EVT EltVT = N->getValueType(0).getVectorElementType();
277 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
278 /* High */ EltVT.getFixedSizeInBits(),
279 /* AllowSaturation */ true, Imm);
280 }
281
282 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
283 template<signed Min, signed Max, signed Scale, bool Shift>
284 bool SelectCntImm(SDValue N, SDValue &Imm) {
285 if (!isa<ConstantSDNode>(N))
286 return false;
287
288 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
289 if (Shift)
290 MulImm = 1LL << MulImm;
291
292 if ((MulImm % std::abs(Scale)) != 0)
293 return false;
294
295 MulImm /= Scale;
296 if ((MulImm >= Min) && (MulImm <= Max)) {
297 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
298 return true;
299 }
300
301 return false;
302 }
303
304 template <signed Max, signed Scale>
305 bool SelectEXTImm(SDValue N, SDValue &Imm) {
306 if (!isa<ConstantSDNode>(N))
307 return false;
308
309 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
310
311 if (MulImm >= 0 && MulImm <= Max) {
312 MulImm *= Scale;
313 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
314 return true;
315 }
316
317 return false;
318 }
319
320 template <unsigned BaseReg, unsigned Max>
321 bool ImmToReg(SDValue N, SDValue &Imm) {
322 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
323 uint64_t C = CI->getZExtValue();
324
325 if (C > Max)
326 return false;
327
328 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
329 return true;
330 }
331 return false;
332 }
333
334 /// Form sequences of consecutive 64/128-bit registers for use in NEON
335 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
336 /// between 1 and 4 elements. If it contains a single element that is returned
337 /// unchanged; otherwise a REG_SEQUENCE value is returned.
340 // Form a sequence of SVE registers for instructions using list of vectors,
341 // e.g. structured loads and stores (ldN, stN).
342 SDValue createZTuple(ArrayRef<SDValue> Vecs);
343
344 // Similar to above, except the register must start at a multiple of the
345 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
346 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
347
348 /// Generic helper for the createDTuple/createQTuple
349 /// functions. Those should almost always be called instead.
350 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
351 const unsigned SubRegs[]);
352
353 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
354
355 bool tryIndexedLoad(SDNode *N);
356
357 void SelectPtrauthAuth(SDNode *N);
358 void SelectPtrauthResign(SDNode *N);
359
360 bool trySelectStackSlotTagP(SDNode *N);
361 void SelectTagP(SDNode *N);
362
363 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
364 unsigned SubRegIdx);
365 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
366 unsigned SubRegIdx);
367 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
368 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
369 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
370 unsigned Opc_rr, unsigned Opc_ri,
371 bool IsIntr = false);
372 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
373 unsigned Scale, unsigned Opc_ri,
374 unsigned Opc_rr);
375 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
376 bool IsZmMulti, unsigned Opcode,
377 bool HasPred = false);
378 void SelectPExtPair(SDNode *N, unsigned Opc);
379 void SelectWhilePair(SDNode *N, unsigned Opc);
380 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
381 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
382 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
383 bool IsTupleInput, unsigned Opc);
384 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
385
386 template <unsigned MaxIdx, unsigned Scale>
387 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
388 unsigned Op);
389 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
390 unsigned Op, unsigned MaxIdx, unsigned Scale,
391 unsigned BaseReg = 0);
392 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
393 /// SVE Reg+Imm addressing mode.
394 template <int64_t Min, int64_t Max>
395 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
396 SDValue &OffImm);
397 /// SVE Reg+Reg address mode.
398 template <unsigned Scale>
399 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
400 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
401 }
402
403 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc,
404 uint32_t MaxImm);
405
406 template <unsigned MaxIdx, unsigned Scale>
407 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
408 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
409 }
410
411 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
412 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
413 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
414 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
415 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
416 unsigned Opc_rr, unsigned Opc_ri);
417 std::tuple<unsigned, SDValue, SDValue>
418 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
419 const SDValue &OldBase, const SDValue &OldOffset,
420 unsigned Scale);
421
422 bool tryBitfieldExtractOp(SDNode *N);
423 bool tryBitfieldExtractOpFromSExt(SDNode *N);
424 bool tryBitfieldInsertOp(SDNode *N);
425 bool tryBitfieldInsertInZeroOp(SDNode *N);
426 bool tryShiftAmountMod(SDNode *N);
427
428 bool tryReadRegister(SDNode *N);
429 bool tryWriteRegister(SDNode *N);
430
431 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
432 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
433
434 bool trySelectXAR(SDNode *N);
435
436// Include the pieces autogenerated from the target description.
437#include "AArch64GenDAGISel.inc"
438
439private:
440 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
441 SDValue &Shift);
442 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
443 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
444 SDValue &OffImm) {
445 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
446 }
447 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
448 unsigned Size, SDValue &Base,
449 SDValue &OffImm);
450 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
451 SDValue &OffImm);
452 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
453 SDValue &OffImm);
454 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
455 SDValue &Offset, SDValue &SignExtend,
456 SDValue &DoShift);
457 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
458 SDValue &Offset, SDValue &SignExtend,
459 SDValue &DoShift);
460 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
461 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
462 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
463 SDValue &Offset, SDValue &SignExtend);
464
465 template<unsigned RegWidth>
466 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
467 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
468 }
469
470 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
471
472 template<unsigned RegWidth>
473 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
474 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
475 }
476
477 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
478 unsigned Width);
479
480 bool SelectCMP_SWAP(SDNode *N);
481
482 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
483 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
484 bool Negate);
485 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
486 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
487
488 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
489 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
490 bool AllowSaturation, SDValue &Imm);
491
492 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
493 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
494 SDValue &Offset);
495 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
496 SDValue &Offset, unsigned Scale = 1);
497
498 bool SelectAllActivePredicate(SDValue N);
499 bool SelectAnyPredicate(SDValue N);
500};
501
502class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
503public:
504 static char ID;
505 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
506 CodeGenOptLevel OptLevel)
508 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
509};
510} // end anonymous namespace
511
512char AArch64DAGToDAGISelLegacy::ID = 0;
513
514INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
515
516/// isIntImmediate - This method tests to see if the node is a constant
517/// operand. If so Imm will receive the 32-bit value.
518static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
519 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
520 Imm = C->getZExtValue();
521 return true;
522 }
523 return false;
524}
525
526// isIntImmediate - This method tests to see if a constant operand.
527// If so Imm will receive the value.
528static bool isIntImmediate(SDValue N, uint64_t &Imm) {
529 return isIntImmediate(N.getNode(), Imm);
530}
531
532// isOpcWithIntImmediate - This method tests to see if the node is a specific
533// opcode and that it has a immediate integer right operand.
534// If so Imm will receive the 32 bit value.
535static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
536 uint64_t &Imm) {
537 return N->getOpcode() == Opc &&
538 isIntImmediate(N->getOperand(1).getNode(), Imm);
539}
540
541// isIntImmediateEq - This method tests to see if N is a constant operand that
542// is equivalent to 'ImmExpected'.
543#ifndef NDEBUG
544static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
545 uint64_t Imm;
546 if (!isIntImmediate(N.getNode(), Imm))
547 return false;
548 return Imm == ImmExpected;
549}
550#endif
551
552bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
553 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
554 std::vector<SDValue> &OutOps) {
555 switch(ConstraintID) {
556 default:
557 llvm_unreachable("Unexpected asm memory constraint");
558 case InlineAsm::ConstraintCode::m:
559 case InlineAsm::ConstraintCode::o:
560 case InlineAsm::ConstraintCode::Q:
561 // We need to make sure that this one operand does not end up in XZR, thus
562 // require the address to be in a PointerRegClass register.
563 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
564 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
565 SDLoc dl(Op);
566 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
567 SDValue NewOp =
568 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
569 dl, Op.getValueType(),
570 Op, RC), 0);
571 OutOps.push_back(NewOp);
572 return false;
573 }
574 return true;
575}
576
577/// SelectArithImmed - Select an immediate value that can be represented as
578/// a 12-bit value shifted left by either 0 or 12. If so, return true with
579/// Val set to the 12-bit value and Shift set to the shifter operand.
580bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
581 SDValue &Shift) {
582 // This function is called from the addsub_shifted_imm ComplexPattern,
583 // which lists [imm] as the list of opcode it's interested in, however
584 // we still need to check whether the operand is actually an immediate
585 // here because the ComplexPattern opcode list is only used in
586 // root-level opcode matching.
587 if (!isa<ConstantSDNode>(N.getNode()))
588 return false;
589
590 uint64_t Immed = N.getNode()->getAsZExtVal();
591 unsigned ShiftAmt;
592
593 if (Immed >> 12 == 0) {
594 ShiftAmt = 0;
595 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
596 ShiftAmt = 12;
597 Immed = Immed >> 12;
598 } else
599 return false;
600
601 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
602 SDLoc dl(N);
603 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
604 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
605 return true;
606}
607
608/// SelectNegArithImmed - As above, but negates the value before trying to
609/// select it.
610bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
611 SDValue &Shift) {
612 // This function is called from the addsub_shifted_imm ComplexPattern,
613 // which lists [imm] as the list of opcode it's interested in, however
614 // we still need to check whether the operand is actually an immediate
615 // here because the ComplexPattern opcode list is only used in
616 // root-level opcode matching.
617 if (!isa<ConstantSDNode>(N.getNode()))
618 return false;
619
620 // The immediate operand must be a 24-bit zero-extended immediate.
621 uint64_t Immed = N.getNode()->getAsZExtVal();
622
623 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
624 // have the opposite effect on the C flag, so this pattern mustn't match under
625 // those circumstances.
626 if (Immed == 0)
627 return false;
628
629 if (N.getValueType() == MVT::i32)
630 Immed = ~((uint32_t)Immed) + 1;
631 else
632 Immed = ~Immed + 1ULL;
633 if (Immed & 0xFFFFFFFFFF000000ULL)
634 return false;
635
636 Immed &= 0xFFFFFFULL;
637 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
638 Shift);
639}
640
641/// getShiftTypeForNode - Translate a shift node to the corresponding
642/// ShiftType value.
644 switch (N.getOpcode()) {
645 default:
647 case ISD::SHL:
648 return AArch64_AM::LSL;
649 case ISD::SRL:
650 return AArch64_AM::LSR;
651 case ISD::SRA:
652 return AArch64_AM::ASR;
653 case ISD::ROTR:
654 return AArch64_AM::ROR;
655 }
656}
657
658/// Determine whether it is worth it to fold SHL into the addressing
659/// mode.
661 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
662 // It is worth folding logical shift of up to three places.
663 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
664 if (!CSD)
665 return false;
666 unsigned ShiftVal = CSD->getZExtValue();
667 if (ShiftVal > 3)
668 return false;
669
670 // Check if this particular node is reused in any non-memory related
671 // operation. If yes, do not try to fold this node into the address
672 // computation, since the computation will be kept.
673 const SDNode *Node = V.getNode();
674 for (SDNode *UI : Node->uses())
675 if (!isa<MemSDNode>(*UI))
676 for (SDNode *UII : UI->uses())
677 if (!isa<MemSDNode>(*UII))
678 return false;
679 return true;
680}
681
682/// Determine whether it is worth to fold V into an extended register addressing
683/// mode.
684bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
685 // Trivial if we are optimizing for code size or if there is only
686 // one use of the value.
687 if (CurDAG->shouldOptForSize() || V.hasOneUse())
688 return true;
689
690 // If a subtarget has a slow shift, folding a shift into multiple loads
691 // costs additional micro-ops.
692 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
693 return false;
694
695 // Check whether we're going to emit the address arithmetic anyway because
696 // it's used by a non-address operation.
697 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
698 return true;
699 if (V.getOpcode() == ISD::ADD) {
700 const SDValue LHS = V.getOperand(0);
701 const SDValue RHS = V.getOperand(1);
702 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
703 return true;
704 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
705 return true;
706 }
707
708 // It hurts otherwise, since the value will be reused.
709 return false;
710}
711
712/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
713/// to select more shifted register
714bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
715 SDValue &Shift) {
716 EVT VT = N.getValueType();
717 if (VT != MVT::i32 && VT != MVT::i64)
718 return false;
719
720 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
721 return false;
722 SDValue LHS = N.getOperand(0);
723 if (!LHS->hasOneUse())
724 return false;
725
726 unsigned LHSOpcode = LHS->getOpcode();
727 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
728 return false;
729
730 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
731 if (!ShiftAmtNode)
732 return false;
733
734 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
735 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
736 if (!RHSC)
737 return false;
738
739 APInt AndMask = RHSC->getAPIntValue();
740 unsigned LowZBits, MaskLen;
741 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
742 return false;
743
744 unsigned BitWidth = N.getValueSizeInBits();
745 SDLoc DL(LHS);
746 uint64_t NewShiftC;
747 unsigned NewShiftOp;
748 if (LHSOpcode == ISD::SHL) {
749 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
750 // BitWidth != LowZBits + MaskLen doesn't match the pattern
751 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
752 return false;
753
754 NewShiftC = LowZBits - ShiftAmtC;
755 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
756 } else {
757 if (LowZBits == 0)
758 return false;
759
760 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
761 NewShiftC = LowZBits + ShiftAmtC;
762 if (NewShiftC >= BitWidth)
763 return false;
764
765 // SRA need all high bits
766 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
767 return false;
768
769 // SRL high bits can be 0 or 1
770 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
771 return false;
772
773 if (LHSOpcode == ISD::SRL)
774 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
775 else
776 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
777 }
778
779 assert(NewShiftC < BitWidth && "Invalid shift amount");
780 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
781 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
782 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
783 NewShiftAmt, BitWidthMinus1),
784 0);
785 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
786 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
787 return true;
788}
789
790/// getExtendTypeForNode - Translate an extend node to the corresponding
791/// ExtendType value.
793getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
794 if (N.getOpcode() == ISD::SIGN_EXTEND ||
795 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
796 EVT SrcVT;
797 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
798 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
799 else
800 SrcVT = N.getOperand(0).getValueType();
801
802 if (!IsLoadStore && SrcVT == MVT::i8)
803 return AArch64_AM::SXTB;
804 else if (!IsLoadStore && SrcVT == MVT::i16)
805 return AArch64_AM::SXTH;
806 else if (SrcVT == MVT::i32)
807 return AArch64_AM::SXTW;
808 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
809
811 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
812 N.getOpcode() == ISD::ANY_EXTEND) {
813 EVT SrcVT = N.getOperand(0).getValueType();
814 if (!IsLoadStore && SrcVT == MVT::i8)
815 return AArch64_AM::UXTB;
816 else if (!IsLoadStore && SrcVT == MVT::i16)
817 return AArch64_AM::UXTH;
818 else if (SrcVT == MVT::i32)
819 return AArch64_AM::UXTW;
820 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
821
823 } else if (N.getOpcode() == ISD::AND) {
824 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
825 if (!CSD)
827 uint64_t AndMask = CSD->getZExtValue();
828
829 switch (AndMask) {
830 default:
832 case 0xFF:
833 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
834 case 0xFFFF:
835 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
836 case 0xFFFFFFFF:
837 return AArch64_AM::UXTW;
838 }
839 }
840
842}
843
844/// Determine whether it is worth to fold V into an extended register of an
845/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
846/// instruction, and the shift should be treated as worth folding even if has
847/// multiple uses.
848bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
849 // Trivial if we are optimizing for code size or if there is only
850 // one use of the value.
851 if (CurDAG->shouldOptForSize() || V.hasOneUse())
852 return true;
853
854 // If a subtarget has a fastpath LSL we can fold a logical shift into
855 // the add/sub and save a cycle.
856 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
857 V.getConstantOperandVal(1) <= 4 &&
859 return true;
860
861 // It hurts otherwise, since the value will be reused.
862 return false;
863}
864
865/// SelectShiftedRegister - Select a "shifted register" operand. If the value
866/// is not shifted, set the Shift operand to default of "LSL 0". The logical
867/// instructions allow the shifted register to be rotated, but the arithmetic
868/// instructions do not. The AllowROR parameter specifies whether ROR is
869/// supported.
870bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
871 SDValue &Reg, SDValue &Shift) {
872 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
873 return true;
874
876 if (ShType == AArch64_AM::InvalidShiftExtend)
877 return false;
878 if (!AllowROR && ShType == AArch64_AM::ROR)
879 return false;
880
881 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
882 unsigned BitSize = N.getValueSizeInBits();
883 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
884 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
885
886 Reg = N.getOperand(0);
887 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
888 return isWorthFoldingALU(N, true);
889 }
890
891 return false;
892}
893
894/// Instructions that accept extend modifiers like UXTW expect the register
895/// being extended to be a GPR32, but the incoming DAG might be acting on a
896/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
897/// this is the case.
899 if (N.getValueType() == MVT::i32)
900 return N;
901
902 SDLoc dl(N);
903 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
904}
905
906// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
907template<signed Low, signed High, signed Scale>
908bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
909 if (!isa<ConstantSDNode>(N))
910 return false;
911
912 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
913 if ((MulImm % std::abs(Scale)) == 0) {
914 int64_t RDVLImm = MulImm / Scale;
915 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
916 Imm = CurDAG->getSignedConstant(RDVLImm, SDLoc(N), MVT::i32,
917 /*isTarget=*/true);
918 return true;
919 }
920 }
921
922 return false;
923}
924
925/// SelectArithExtendedRegister - Select a "extended register" operand. This
926/// operand folds in an extend followed by an optional left shift.
927bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
928 SDValue &Shift) {
929 unsigned ShiftVal = 0;
931
932 if (N.getOpcode() == ISD::SHL) {
933 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
934 if (!CSD)
935 return false;
936 ShiftVal = CSD->getZExtValue();
937 if (ShiftVal > 4)
938 return false;
939
940 Ext = getExtendTypeForNode(N.getOperand(0));
942 return false;
943
944 Reg = N.getOperand(0).getOperand(0);
945 } else {
948 return false;
949
950 Reg = N.getOperand(0);
951
952 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
953 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
954 auto isDef32 = [](SDValue N) {
955 unsigned Opc = N.getOpcode();
956 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
957 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
958 Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
959 Opc != ISD::FREEZE;
960 };
961 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
962 isDef32(Reg))
963 return false;
964 }
965
966 // AArch64 mandates that the RHS of the operation must use the smallest
967 // register class that could contain the size being extended from. Thus,
968 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
969 // there might not be an actual 32-bit value in the program. We can
970 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
971 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
972 Reg = narrowIfNeeded(CurDAG, Reg);
973 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
974 MVT::i32);
975 return isWorthFoldingALU(N);
976}
977
978/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
979/// operand is refered by the instructions have SP operand
980bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
981 SDValue &Shift) {
982 unsigned ShiftVal = 0;
984
985 if (N.getOpcode() != ISD::SHL)
986 return false;
987
988 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
989 if (!CSD)
990 return false;
991 ShiftVal = CSD->getZExtValue();
992 if (ShiftVal > 4)
993 return false;
994
996 Reg = N.getOperand(0);
997 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
998 MVT::i32);
999 return isWorthFoldingALU(N);
1000}
1001
1002/// If there's a use of this ADDlow that's not itself a load/store then we'll
1003/// need to create a real ADD instruction from it anyway and there's no point in
1004/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1005/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1006/// leads to duplicated ADRP instructions.
1008 for (auto *Use : N->uses()) {
1009 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
1010 Use->getOpcode() != ISD::ATOMIC_LOAD &&
1011 Use->getOpcode() != ISD::ATOMIC_STORE)
1012 return false;
1013
1014 // ldar and stlr have much more restrictive addressing modes (just a
1015 // register).
1016 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering()))
1017 return false;
1018 }
1019
1020 return true;
1021}
1022
1023/// Check if the immediate offset is valid as a scaled immediate.
1024static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1025 unsigned Size) {
1026 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1027 Offset < (Range << Log2_32(Size)))
1028 return true;
1029 return false;
1030}
1031
1032/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1033/// immediate" address. The "Size" argument is the size in bytes of the memory
1034/// reference, which determines the scale.
1035bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1036 unsigned BW, unsigned Size,
1037 SDValue &Base,
1038 SDValue &OffImm) {
1039 SDLoc dl(N);
1040 const DataLayout &DL = CurDAG->getDataLayout();
1041 const TargetLowering *TLI = getTargetLowering();
1042 if (N.getOpcode() == ISD::FrameIndex) {
1043 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1044 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1045 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1046 return true;
1047 }
1048
1049 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1050 // selected here doesn't support labels/immediates, only base+offset.
1051 if (CurDAG->isBaseWithConstantOffset(N)) {
1052 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1053 if (IsSignedImm) {
1054 int64_t RHSC = RHS->getSExtValue();
1055 unsigned Scale = Log2_32(Size);
1056 int64_t Range = 0x1LL << (BW - 1);
1057
1058 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1059 RHSC < (Range << Scale)) {
1060 Base = N.getOperand(0);
1061 if (Base.getOpcode() == ISD::FrameIndex) {
1062 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1063 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1064 }
1065 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1066 return true;
1067 }
1068 } else {
1069 // unsigned Immediate
1070 uint64_t RHSC = RHS->getZExtValue();
1071 unsigned Scale = Log2_32(Size);
1072 uint64_t Range = 0x1ULL << BW;
1073
1074 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1075 Base = N.getOperand(0);
1076 if (Base.getOpcode() == ISD::FrameIndex) {
1077 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1078 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1079 }
1080 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1081 return true;
1082 }
1083 }
1084 }
1085 }
1086 // Base only. The address will be materialized into a register before
1087 // the memory is accessed.
1088 // add x0, Xbase, #offset
1089 // stp x1, x2, [x0]
1090 Base = N;
1091 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1092 return true;
1093}
1094
1095/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1096/// immediate" address. The "Size" argument is the size in bytes of the memory
1097/// reference, which determines the scale.
1098bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1099 SDValue &Base, SDValue &OffImm) {
1100 SDLoc dl(N);
1101 const DataLayout &DL = CurDAG->getDataLayout();
1102 const TargetLowering *TLI = getTargetLowering();
1103 if (N.getOpcode() == ISD::FrameIndex) {
1104 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1105 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1106 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1107 return true;
1108 }
1109
1110 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1111 GlobalAddressSDNode *GAN =
1112 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1113 Base = N.getOperand(0);
1114 OffImm = N.getOperand(1);
1115 if (!GAN)
1116 return true;
1117
1118 if (GAN->getOffset() % Size == 0 &&
1120 return true;
1121 }
1122
1123 if (CurDAG->isBaseWithConstantOffset(N)) {
1124 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1125 int64_t RHSC = (int64_t)RHS->getZExtValue();
1126 unsigned Scale = Log2_32(Size);
1127 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1128 Base = N.getOperand(0);
1129 if (Base.getOpcode() == ISD::FrameIndex) {
1130 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1131 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1132 }
1133 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1134 return true;
1135 }
1136 }
1137 }
1138
1139 // Before falling back to our general case, check if the unscaled
1140 // instructions can handle this. If so, that's preferable.
1141 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1142 return false;
1143
1144 // Base only. The address will be materialized into a register before
1145 // the memory is accessed.
1146 // add x0, Xbase, #offset
1147 // ldr x0, [x0]
1148 Base = N;
1149 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1150 return true;
1151}
1152
1153/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1154/// immediate" address. This should only match when there is an offset that
1155/// is not valid for a scaled immediate addressing mode. The "Size" argument
1156/// is the size in bytes of the memory reference, which is needed here to know
1157/// what is valid for a scaled immediate.
1158bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1159 SDValue &Base,
1160 SDValue &OffImm) {
1161 if (!CurDAG->isBaseWithConstantOffset(N))
1162 return false;
1163 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1164 int64_t RHSC = RHS->getSExtValue();
1165 if (RHSC >= -256 && RHSC < 256) {
1166 Base = N.getOperand(0);
1167 if (Base.getOpcode() == ISD::FrameIndex) {
1168 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1169 const TargetLowering *TLI = getTargetLowering();
1170 Base = CurDAG->getTargetFrameIndex(
1171 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1172 }
1173 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1174 return true;
1175 }
1176 }
1177 return false;
1178}
1179
1181 SDLoc dl(N);
1182 SDValue ImpDef = SDValue(
1183 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1184 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1185 N);
1186}
1187
1188/// Check if the given SHL node (\p N), can be used to form an
1189/// extended register for an addressing mode.
1190bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1191 bool WantExtend, SDValue &Offset,
1192 SDValue &SignExtend) {
1193 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1194 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1195 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1196 return false;
1197
1198 SDLoc dl(N);
1199 if (WantExtend) {
1201 getExtendTypeForNode(N.getOperand(0), true);
1203 return false;
1204
1205 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1206 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1207 MVT::i32);
1208 } else {
1209 Offset = N.getOperand(0);
1210 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1211 }
1212
1213 unsigned LegalShiftVal = Log2_32(Size);
1214 unsigned ShiftVal = CSD->getZExtValue();
1215
1216 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1217 return false;
1218
1219 return isWorthFoldingAddr(N, Size);
1220}
1221
1222bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1224 SDValue &SignExtend,
1225 SDValue &DoShift) {
1226 if (N.getOpcode() != ISD::ADD)
1227 return false;
1228 SDValue LHS = N.getOperand(0);
1229 SDValue RHS = N.getOperand(1);
1230 SDLoc dl(N);
1231
1232 // We don't want to match immediate adds here, because they are better lowered
1233 // to the register-immediate addressing modes.
1234 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1235 return false;
1236
1237 // Check if this particular node is reused in any non-memory related
1238 // operation. If yes, do not try to fold this node into the address
1239 // computation, since the computation will be kept.
1240 const SDNode *Node = N.getNode();
1241 for (SDNode *UI : Node->uses()) {
1242 if (!isa<MemSDNode>(*UI))
1243 return false;
1244 }
1245
1246 // Remember if it is worth folding N when it produces extended register.
1247 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1248
1249 // Try to match a shifted extend on the RHS.
1250 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1251 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1252 Base = LHS;
1253 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1254 return true;
1255 }
1256
1257 // Try to match a shifted extend on the LHS.
1258 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1259 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1260 Base = RHS;
1261 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1262 return true;
1263 }
1264
1265 // There was no shift, whatever else we find.
1266 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1267
1269 // Try to match an unshifted extend on the LHS.
1270 if (IsExtendedRegisterWorthFolding &&
1271 (Ext = getExtendTypeForNode(LHS, true)) !=
1273 Base = RHS;
1274 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1275 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1276 MVT::i32);
1277 if (isWorthFoldingAddr(LHS, Size))
1278 return true;
1279 }
1280
1281 // Try to match an unshifted extend on the RHS.
1282 if (IsExtendedRegisterWorthFolding &&
1283 (Ext = getExtendTypeForNode(RHS, true)) !=
1285 Base = LHS;
1286 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1287 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1288 MVT::i32);
1289 if (isWorthFoldingAddr(RHS, Size))
1290 return true;
1291 }
1292
1293 return false;
1294}
1295
1296// Check if the given immediate is preferred by ADD. If an immediate can be
1297// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1298// encoded by one MOVZ, return true.
1299static bool isPreferredADD(int64_t ImmOff) {
1300 // Constant in [0x0, 0xfff] can be encoded in ADD.
1301 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1302 return true;
1303 // Check if it can be encoded in an "ADD LSL #12".
1304 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1305 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1306 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1307 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1308 return false;
1309}
1310
1311bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1313 SDValue &SignExtend,
1314 SDValue &DoShift) {
1315 if (N.getOpcode() != ISD::ADD)
1316 return false;
1317 SDValue LHS = N.getOperand(0);
1318 SDValue RHS = N.getOperand(1);
1319 SDLoc DL(N);
1320
1321 // Check if this particular node is reused in any non-memory related
1322 // operation. If yes, do not try to fold this node into the address
1323 // computation, since the computation will be kept.
1324 const SDNode *Node = N.getNode();
1325 for (SDNode *UI : Node->uses()) {
1326 if (!isa<MemSDNode>(*UI))
1327 return false;
1328 }
1329
1330 // Watch out if RHS is a wide immediate, it can not be selected into
1331 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1332 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1333 // instructions like:
1334 // MOV X0, WideImmediate
1335 // ADD X1, BaseReg, X0
1336 // LDR X2, [X1, 0]
1337 // For such situation, using [BaseReg, XReg] addressing mode can save one
1338 // ADD/SUB:
1339 // MOV X0, WideImmediate
1340 // LDR X2, [BaseReg, X0]
1341 if (isa<ConstantSDNode>(RHS)) {
1342 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1343 // Skip the immediate can be selected by load/store addressing mode.
1344 // Also skip the immediate can be encoded by a single ADD (SUB is also
1345 // checked by using -ImmOff).
1346 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1347 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1348 return false;
1349
1350 SDValue Ops[] = { RHS };
1351 SDNode *MOVI =
1352 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1353 SDValue MOVIV = SDValue(MOVI, 0);
1354 // This ADD of two X register will be selected into [Reg+Reg] mode.
1355 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1356 }
1357
1358 // Remember if it is worth folding N when it produces extended register.
1359 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1360
1361 // Try to match a shifted extend on the RHS.
1362 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1363 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1364 Base = LHS;
1365 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1366 return true;
1367 }
1368
1369 // Try to match a shifted extend on the LHS.
1370 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1371 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1372 Base = RHS;
1373 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1374 return true;
1375 }
1376
1377 // Match any non-shifted, non-extend, non-immediate add expression.
1378 Base = LHS;
1379 Offset = RHS;
1380 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1381 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1382 // Reg1 + Reg2 is free: no check needed.
1383 return true;
1384}
1385
1386SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1387 static const unsigned RegClassIDs[] = {
1388 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1389 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1390 AArch64::dsub2, AArch64::dsub3};
1391
1392 return createTuple(Regs, RegClassIDs, SubRegs);
1393}
1394
1395SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1396 static const unsigned RegClassIDs[] = {
1397 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1398 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1399 AArch64::qsub2, AArch64::qsub3};
1400
1401 return createTuple(Regs, RegClassIDs, SubRegs);
1402}
1403
1404SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1405 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1406 AArch64::ZPR3RegClassID,
1407 AArch64::ZPR4RegClassID};
1408 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1409 AArch64::zsub2, AArch64::zsub3};
1410
1411 return createTuple(Regs, RegClassIDs, SubRegs);
1412}
1413
1414SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1415 assert(Regs.size() == 2 || Regs.size() == 4);
1416
1417 // The createTuple interface requires 3 RegClassIDs for each possible
1418 // tuple type even though we only have them for ZPR2 and ZPR4.
1419 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1420 AArch64::ZPR4Mul4RegClassID};
1421 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1422 AArch64::zsub2, AArch64::zsub3};
1423 return createTuple(Regs, RegClassIDs, SubRegs);
1424}
1425
1426SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1427 const unsigned RegClassIDs[],
1428 const unsigned SubRegs[]) {
1429 // There's no special register-class for a vector-list of 1 element: it's just
1430 // a vector.
1431 if (Regs.size() == 1)
1432 return Regs[0];
1433
1434 assert(Regs.size() >= 2 && Regs.size() <= 4);
1435
1436 SDLoc DL(Regs[0]);
1437
1439
1440 // First operand of REG_SEQUENCE is the desired RegClass.
1441 Ops.push_back(
1442 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1443
1444 // Then we get pairs of source & subregister-position for the components.
1445 for (unsigned i = 0; i < Regs.size(); ++i) {
1446 Ops.push_back(Regs[i]);
1447 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1448 }
1449
1450 SDNode *N =
1451 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1452 return SDValue(N, 0);
1453}
1454
1455void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1456 bool isExt) {
1457 SDLoc dl(N);
1458 EVT VT = N->getValueType(0);
1459
1460 unsigned ExtOff = isExt;
1461
1462 // Form a REG_SEQUENCE to force register allocation.
1463 unsigned Vec0Off = ExtOff + 1;
1464 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1465 N->op_begin() + Vec0Off + NumVecs);
1466 SDValue RegSeq = createQTuple(Regs);
1467
1469 if (isExt)
1470 Ops.push_back(N->getOperand(1));
1471 Ops.push_back(RegSeq);
1472 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1473 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1474}
1475
1476static std::tuple<SDValue, SDValue>
1478 SDLoc DL(Disc);
1479 SDValue AddrDisc;
1480 SDValue ConstDisc;
1481
1482 // If this is a blend, remember the constant and address discriminators.
1483 // Otherwise, it's either a constant discriminator, or a non-blended
1484 // address discriminator.
1485 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1486 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1487 AddrDisc = Disc->getOperand(1);
1488 ConstDisc = Disc->getOperand(2);
1489 } else {
1490 ConstDisc = Disc;
1491 }
1492
1493 // If the constant discriminator (either the blend RHS, or the entire
1494 // discriminator value) isn't a 16-bit constant, bail out, and let the
1495 // discriminator be computed separately.
1496 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1497 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1498 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1499
1500 // If there's no address discriminator, use XZR directly.
1501 if (!AddrDisc)
1502 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1503
1504 return std::make_tuple(
1505 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1506 AddrDisc);
1507}
1508
1509void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1510 SDLoc DL(N);
1511 // IntrinsicID is operand #0
1512 SDValue Val = N->getOperand(1);
1513 SDValue AUTKey = N->getOperand(2);
1514 SDValue AUTDisc = N->getOperand(3);
1515
1516 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1517 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1518
1519 SDValue AUTAddrDisc, AUTConstDisc;
1520 std::tie(AUTConstDisc, AUTAddrDisc) =
1521 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1522
1523 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1524 AArch64::X16, Val, SDValue());
1525 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1526
1527 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUT, DL, MVT::i64, Ops);
1528 ReplaceNode(N, AUT);
1529 return;
1530}
1531
1532void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1533 SDLoc DL(N);
1534 // IntrinsicID is operand #0
1535 SDValue Val = N->getOperand(1);
1536 SDValue AUTKey = N->getOperand(2);
1537 SDValue AUTDisc = N->getOperand(3);
1538 SDValue PACKey = N->getOperand(4);
1539 SDValue PACDisc = N->getOperand(5);
1540
1541 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1542 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1543
1544 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1545 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1546
1547 SDValue AUTAddrDisc, AUTConstDisc;
1548 std::tie(AUTConstDisc, AUTAddrDisc) =
1549 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1550
1551 SDValue PACAddrDisc, PACConstDisc;
1552 std::tie(PACConstDisc, PACAddrDisc) =
1553 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1554
1555 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1556 AArch64::X16, Val, SDValue());
1557
1558 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1559 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1560
1561 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1562 ReplaceNode(N, AUTPAC);
1563 return;
1564}
1565
1566bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1567 LoadSDNode *LD = cast<LoadSDNode>(N);
1568 if (LD->isUnindexed())
1569 return false;
1570 EVT VT = LD->getMemoryVT();
1571 EVT DstVT = N->getValueType(0);
1572 ISD::MemIndexedMode AM = LD->getAddressingMode();
1573 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1574
1575 // We're not doing validity checking here. That was done when checking
1576 // if we should mark the load as indexed or not. We're just selecting
1577 // the right instruction.
1578 unsigned Opcode = 0;
1579
1580 ISD::LoadExtType ExtType = LD->getExtensionType();
1581 bool InsertTo64 = false;
1582 if (VT == MVT::i64)
1583 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1584 else if (VT == MVT::i32) {
1585 if (ExtType == ISD::NON_EXTLOAD)
1586 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1587 else if (ExtType == ISD::SEXTLOAD)
1588 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1589 else {
1590 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1591 InsertTo64 = true;
1592 // The result of the load is only i32. It's the subreg_to_reg that makes
1593 // it into an i64.
1594 DstVT = MVT::i32;
1595 }
1596 } else if (VT == MVT::i16) {
1597 if (ExtType == ISD::SEXTLOAD) {
1598 if (DstVT == MVT::i64)
1599 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1600 else
1601 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1602 } else {
1603 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1604 InsertTo64 = DstVT == MVT::i64;
1605 // The result of the load is only i32. It's the subreg_to_reg that makes
1606 // it into an i64.
1607 DstVT = MVT::i32;
1608 }
1609 } else if (VT == MVT::i8) {
1610 if (ExtType == ISD::SEXTLOAD) {
1611 if (DstVT == MVT::i64)
1612 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1613 else
1614 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1615 } else {
1616 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1617 InsertTo64 = DstVT == MVT::i64;
1618 // The result of the load is only i32. It's the subreg_to_reg that makes
1619 // it into an i64.
1620 DstVT = MVT::i32;
1621 }
1622 } else if (VT == MVT::f16) {
1623 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1624 } else if (VT == MVT::bf16) {
1625 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1626 } else if (VT == MVT::f32) {
1627 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1628 } else if (VT == MVT::f64 || VT.is64BitVector()) {
1629 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1630 } else if (VT.is128BitVector()) {
1631 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1632 } else
1633 return false;
1634 SDValue Chain = LD->getChain();
1635 SDValue Base = LD->getBasePtr();
1636 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1637 int OffsetVal = (int)OffsetOp->getZExtValue();
1638 SDLoc dl(N);
1639 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1640 SDValue Ops[] = { Base, Offset, Chain };
1641 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1642 MVT::Other, Ops);
1643
1644 // Transfer memoperands.
1645 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1646 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1647
1648 // Either way, we're replacing the node, so tell the caller that.
1649 SDValue LoadedVal = SDValue(Res, 1);
1650 if (InsertTo64) {
1651 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1652 LoadedVal =
1653 SDValue(CurDAG->getMachineNode(
1654 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1655 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1656 SubReg),
1657 0);
1658 }
1659
1660 ReplaceUses(SDValue(N, 0), LoadedVal);
1661 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1662 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1663 CurDAG->RemoveDeadNode(N);
1664 return true;
1665}
1666
1667void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1668 unsigned SubRegIdx) {
1669 SDLoc dl(N);
1670 EVT VT = N->getValueType(0);
1671 SDValue Chain = N->getOperand(0);
1672
1673 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1674 Chain};
1675
1676 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1677
1678 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1679 SDValue SuperReg = SDValue(Ld, 0);
1680 for (unsigned i = 0; i < NumVecs; ++i)
1681 ReplaceUses(SDValue(N, i),
1682 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1683
1684 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1685
1686 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1687 // because it's too simple to have needed special treatment during lowering.
1688 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1689 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1690 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1691 }
1692
1693 CurDAG->RemoveDeadNode(N);
1694}
1695
1696void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1697 unsigned Opc, unsigned SubRegIdx) {
1698 SDLoc dl(N);
1699 EVT VT = N->getValueType(0);
1700 SDValue Chain = N->getOperand(0);
1701
1702 SDValue Ops[] = {N->getOperand(1), // Mem operand
1703 N->getOperand(2), // Incremental
1704 Chain};
1705
1706 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1707 MVT::Untyped, MVT::Other};
1708
1709 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1710
1711 // Update uses of write back register
1712 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1713
1714 // Update uses of vector list
1715 SDValue SuperReg = SDValue(Ld, 1);
1716 if (NumVecs == 1)
1717 ReplaceUses(SDValue(N, 0), SuperReg);
1718 else
1719 for (unsigned i = 0; i < NumVecs; ++i)
1720 ReplaceUses(SDValue(N, i),
1721 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1722
1723 // Update the chain
1724 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1725 CurDAG->RemoveDeadNode(N);
1726}
1727
1728/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1729/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1730/// new Base and an SDValue representing the new offset.
1731std::tuple<unsigned, SDValue, SDValue>
1732AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1733 unsigned Opc_ri,
1734 const SDValue &OldBase,
1735 const SDValue &OldOffset,
1736 unsigned Scale) {
1737 SDValue NewBase = OldBase;
1738 SDValue NewOffset = OldOffset;
1739 // Detect a possible Reg+Imm addressing mode.
1740 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1741 N, OldBase, NewBase, NewOffset);
1742
1743 // Detect a possible reg+reg addressing mode, but only if we haven't already
1744 // detected a Reg+Imm one.
1745 const bool IsRegReg =
1746 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1747
1748 // Select the instruction.
1749 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1750}
1751
1752enum class SelectTypeKind {
1753 Int1 = 0,
1754 Int = 1,
1755 FP = 2,
1756 AnyType = 3,
1757};
1758
1759/// This function selects an opcode from a list of opcodes, which is
1760/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1761/// element types, in this order.
1762template <SelectTypeKind Kind>
1763static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1764 // Only match scalable vector VTs
1765 if (!VT.isScalableVector())
1766 return 0;
1767
1768 EVT EltVT = VT.getVectorElementType();
1769 unsigned Key = VT.getVectorMinNumElements();
1770 switch (Kind) {
1772 break;
1774 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1775 EltVT != MVT::i64)
1776 return 0;
1777 break;
1779 if (EltVT != MVT::i1)
1780 return 0;
1781 break;
1782 case SelectTypeKind::FP:
1783 if (EltVT == MVT::bf16)
1784 Key = 16;
1785 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1786 EltVT != MVT::f64)
1787 return 0;
1788 break;
1789 }
1790
1791 unsigned Offset;
1792 switch (Key) {
1793 case 16: // 8-bit or bf16
1794 Offset = 0;
1795 break;
1796 case 8: // 16-bit
1797 Offset = 1;
1798 break;
1799 case 4: // 32-bit
1800 Offset = 2;
1801 break;
1802 case 2: // 64-bit
1803 Offset = 3;
1804 break;
1805 default:
1806 return 0;
1807 }
1808
1809 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1810}
1811
1812// This function is almost identical to SelectWhilePair, but has an
1813// extra check on the range of the immediate operand.
1814// TODO: Merge these two functions together at some point?
1815void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1816 // Immediate can be either 0 or 1.
1817 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1818 if (Imm->getZExtValue() > 1)
1819 return;
1820
1821 SDLoc DL(N);
1822 EVT VT = N->getValueType(0);
1823 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1824 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1825 SDValue SuperReg = SDValue(WhilePair, 0);
1826
1827 for (unsigned I = 0; I < 2; ++I)
1828 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1829 AArch64::psub0 + I, DL, VT, SuperReg));
1830
1831 CurDAG->RemoveDeadNode(N);
1832}
1833
1834void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1835 SDLoc DL(N);
1836 EVT VT = N->getValueType(0);
1837
1838 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1839
1840 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1841 SDValue SuperReg = SDValue(WhilePair, 0);
1842
1843 for (unsigned I = 0; I < 2; ++I)
1844 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1845 AArch64::psub0 + I, DL, VT, SuperReg));
1846
1847 CurDAG->RemoveDeadNode(N);
1848}
1849
1850void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1851 unsigned Opcode) {
1852 EVT VT = N->getValueType(0);
1853 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1854 SDValue Ops = createZTuple(Regs);
1855 SDLoc DL(N);
1856 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1857 SDValue SuperReg = SDValue(Intrinsic, 0);
1858 for (unsigned i = 0; i < NumVecs; ++i)
1859 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1860 AArch64::zsub0 + i, DL, VT, SuperReg));
1861
1862 CurDAG->RemoveDeadNode(N);
1863}
1864
1865void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1866 unsigned NumVecs,
1867 bool IsZmMulti,
1868 unsigned Opcode,
1869 bool HasPred) {
1870 assert(Opcode != 0 && "Unexpected opcode");
1871
1872 SDLoc DL(N);
1873 EVT VT = N->getValueType(0);
1874 unsigned FirstVecIdx = HasPred ? 2 : 1;
1875
1876 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1877 SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs));
1878 return createZMulTuple(Regs);
1879 };
1880
1881 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1882
1883 SDValue Zm;
1884 if (IsZmMulti)
1885 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1886 else
1887 Zm = N->getOperand(NumVecs + FirstVecIdx);
1888
1890 if (HasPred)
1891 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1892 N->getOperand(1), Zdn, Zm);
1893 else
1894 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1895 SDValue SuperReg = SDValue(Intrinsic, 0);
1896 for (unsigned i = 0; i < NumVecs; ++i)
1897 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1898 AArch64::zsub0 + i, DL, VT, SuperReg));
1899
1900 CurDAG->RemoveDeadNode(N);
1901}
1902
1903void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1904 unsigned Scale, unsigned Opc_ri,
1905 unsigned Opc_rr, bool IsIntr) {
1906 assert(Scale < 5 && "Invalid scaling value.");
1907 SDLoc DL(N);
1908 EVT VT = N->getValueType(0);
1909 SDValue Chain = N->getOperand(0);
1910
1911 // Optimize addressing mode.
1913 unsigned Opc;
1914 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1915 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1916 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1917
1918 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
1919 Base, // Memory operand
1920 Offset, Chain};
1921
1922 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1923
1924 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1925 SDValue SuperReg = SDValue(Load, 0);
1926 for (unsigned i = 0; i < NumVecs; ++i)
1927 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1928 AArch64::zsub0 + i, DL, VT, SuperReg));
1929
1930 // Copy chain
1931 unsigned ChainIdx = NumVecs;
1932 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1933 CurDAG->RemoveDeadNode(N);
1934}
1935
1936void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
1937 unsigned NumVecs,
1938 unsigned Scale,
1939 unsigned Opc_ri,
1940 unsigned Opc_rr) {
1941 assert(Scale < 4 && "Invalid scaling value.");
1942 SDLoc DL(N);
1943 EVT VT = N->getValueType(0);
1944 SDValue Chain = N->getOperand(0);
1945
1946 SDValue PNg = N->getOperand(2);
1947 SDValue Base = N->getOperand(3);
1948 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
1949 unsigned Opc;
1950 std::tie(Opc, Base, Offset) =
1951 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
1952
1953 SDValue Ops[] = {PNg, // Predicate-as-counter
1954 Base, // Memory operand
1955 Offset, Chain};
1956
1957 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1958
1959 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1960 SDValue SuperReg = SDValue(Load, 0);
1961 for (unsigned i = 0; i < NumVecs; ++i)
1962 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1963 AArch64::zsub0 + i, DL, VT, SuperReg));
1964
1965 // Copy chain
1966 unsigned ChainIdx = NumVecs;
1967 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1968 CurDAG->RemoveDeadNode(N);
1969}
1970
1971void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
1972 unsigned Opcode) {
1973 if (N->getValueType(0) != MVT::nxv4f32)
1974 return;
1975 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
1976}
1977
1978void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
1979 unsigned NumOutVecs,
1980 unsigned Opc, uint32_t MaxImm) {
1981 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
1982 if (Imm->getZExtValue() > MaxImm)
1983 return;
1984
1985 SDValue ZtValue;
1986 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
1987 return;
1988 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
1989 SDLoc DL(Node);
1990 EVT VT = Node->getValueType(0);
1991
1993 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
1994 SDValue SuperReg = SDValue(Instruction, 0);
1995
1996 for (unsigned I = 0; I < NumOutVecs; ++I)
1997 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
1998 AArch64::zsub0 + I, DL, VT, SuperReg));
1999
2000 // Copy chain
2001 unsigned ChainIdx = NumOutVecs;
2002 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2003 CurDAG->RemoveDeadNode(Node);
2004}
2005
2006void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2007 unsigned Op) {
2008 SDLoc DL(N);
2009 EVT VT = N->getValueType(0);
2010
2011 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2012 SDValue Zd = createZMulTuple(Regs);
2013 SDValue Zn = N->getOperand(1 + NumVecs);
2014 SDValue Zm = N->getOperand(2 + NumVecs);
2015
2016 SDValue Ops[] = {Zd, Zn, Zm};
2017
2018 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2019 SDValue SuperReg = SDValue(Intrinsic, 0);
2020 for (unsigned i = 0; i < NumVecs; ++i)
2021 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2022 AArch64::zsub0 + i, DL, VT, SuperReg));
2023
2024 CurDAG->RemoveDeadNode(N);
2025}
2026
2027bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2028 switch (BaseReg) {
2029 default:
2030 return false;
2031 case AArch64::ZA:
2032 case AArch64::ZAB0:
2033 if (TileNum == 0)
2034 break;
2035 return false;
2036 case AArch64::ZAH0:
2037 if (TileNum <= 1)
2038 break;
2039 return false;
2040 case AArch64::ZAS0:
2041 if (TileNum <= 3)
2042 break;
2043 return false;
2044 case AArch64::ZAD0:
2045 if (TileNum <= 7)
2046 break;
2047 return false;
2048 }
2049
2050 BaseReg += TileNum;
2051 return true;
2052}
2053
2054template <unsigned MaxIdx, unsigned Scale>
2055void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2056 unsigned BaseReg, unsigned Op) {
2057 unsigned TileNum = 0;
2058 if (BaseReg != AArch64::ZA)
2059 TileNum = N->getConstantOperandVal(2);
2060
2061 if (!SelectSMETile(BaseReg, TileNum))
2062 return;
2063
2064 SDValue SliceBase, Base, Offset;
2065 if (BaseReg == AArch64::ZA)
2066 SliceBase = N->getOperand(2);
2067 else
2068 SliceBase = N->getOperand(3);
2069
2070 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2071 return;
2072
2073 SDLoc DL(N);
2074 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2075 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2076 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2077
2078 EVT VT = N->getValueType(0);
2079 for (unsigned I = 0; I < NumVecs; ++I)
2080 ReplaceUses(SDValue(N, I),
2081 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2082 SDValue(Mov, 0)));
2083 // Copy chain
2084 unsigned ChainIdx = NumVecs;
2085 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2086 CurDAG->RemoveDeadNode(N);
2087}
2088
2089void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2090 unsigned Op, unsigned MaxIdx,
2091 unsigned Scale, unsigned BaseReg) {
2092 // Slice can be in different positions
2093 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2094 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2095 SDValue SliceBase = N->getOperand(2);
2096 if (BaseReg != AArch64::ZA)
2097 SliceBase = N->getOperand(3);
2098
2100 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2101 return;
2102 // The correct Za tile number is computed in Machine Instruction
2103 // See EmitZAInstr
2104 // DAG cannot select Za tile as an output register with ZReg
2105 SDLoc DL(N);
2107 if (BaseReg != AArch64::ZA )
2108 Ops.push_back(N->getOperand(2));
2109 Ops.push_back(Base);
2110 Ops.push_back(Offset);
2111 Ops.push_back(N->getOperand(0)); //Chain
2112 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2113
2114 EVT VT = N->getValueType(0);
2115 for (unsigned I = 0; I < NumVecs; ++I)
2116 ReplaceUses(SDValue(N, I),
2117 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2118 SDValue(Mov, 0)));
2119
2120 // Copy chain
2121 unsigned ChainIdx = NumVecs;
2122 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2123 CurDAG->RemoveDeadNode(N);
2124}
2125
2126void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2127 unsigned NumOutVecs,
2128 bool IsTupleInput,
2129 unsigned Opc) {
2130 SDLoc DL(N);
2131 EVT VT = N->getValueType(0);
2132 unsigned NumInVecs = N->getNumOperands() - 1;
2133
2135 if (IsTupleInput) {
2136 assert((NumInVecs == 2 || NumInVecs == 4) &&
2137 "Don't know how to handle multi-register input!");
2138 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2139 Ops.push_back(createZMulTuple(Regs));
2140 } else {
2141 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2142 for (unsigned I = 0; I < NumInVecs; I++)
2143 Ops.push_back(N->getOperand(1 + I));
2144 }
2145
2146 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2147 SDValue SuperReg = SDValue(Res, 0);
2148
2149 for (unsigned I = 0; I < NumOutVecs; I++)
2150 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2151 AArch64::zsub0 + I, DL, VT, SuperReg));
2152 CurDAG->RemoveDeadNode(N);
2153}
2154
2155void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2156 unsigned Opc) {
2157 SDLoc dl(N);
2158 EVT VT = N->getOperand(2)->getValueType(0);
2159
2160 // Form a REG_SEQUENCE to force register allocation.
2161 bool Is128Bit = VT.getSizeInBits() == 128;
2162 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2163 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2164
2165 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2166 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2167
2168 // Transfer memoperands.
2169 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2170 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2171
2172 ReplaceNode(N, St);
2173}
2174
2175void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2176 unsigned Scale, unsigned Opc_rr,
2177 unsigned Opc_ri) {
2178 SDLoc dl(N);
2179
2180 // Form a REG_SEQUENCE to force register allocation.
2181 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2182 SDValue RegSeq = createZTuple(Regs);
2183
2184 // Optimize addressing mode.
2185 unsigned Opc;
2187 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2188 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2189 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2190
2191 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2192 Base, // address
2193 Offset, // offset
2194 N->getOperand(0)}; // chain
2195 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2196
2197 ReplaceNode(N, St);
2198}
2199
2200bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2201 SDValue &OffImm) {
2202 SDLoc dl(N);
2203 const DataLayout &DL = CurDAG->getDataLayout();
2204 const TargetLowering *TLI = getTargetLowering();
2205
2206 // Try to match it for the frame address
2207 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2208 int FI = FINode->getIndex();
2209 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2210 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2211 return true;
2212 }
2213
2214 return false;
2215}
2216
2217void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2218 unsigned Opc) {
2219 SDLoc dl(N);
2220 EVT VT = N->getOperand(2)->getValueType(0);
2221 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2222 MVT::Other}; // Type for the Chain
2223
2224 // Form a REG_SEQUENCE to force register allocation.
2225 bool Is128Bit = VT.getSizeInBits() == 128;
2226 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2227 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2228
2229 SDValue Ops[] = {RegSeq,
2230 N->getOperand(NumVecs + 1), // base register
2231 N->getOperand(NumVecs + 2), // Incremental
2232 N->getOperand(0)}; // Chain
2233 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2234
2235 ReplaceNode(N, St);
2236}
2237
2238namespace {
2239/// WidenVector - Given a value in the V64 register class, produce the
2240/// equivalent value in the V128 register class.
2241class WidenVector {
2242 SelectionDAG &DAG;
2243
2244public:
2245 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2246
2247 SDValue operator()(SDValue V64Reg) {
2248 EVT VT = V64Reg.getValueType();
2249 unsigned NarrowSize = VT.getVectorNumElements();
2250 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2251 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2252 SDLoc DL(V64Reg);
2253
2254 SDValue Undef =
2255 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2256 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2257 }
2258};
2259} // namespace
2260
2261/// NarrowVector - Given a value in the V128 register class, produce the
2262/// equivalent value in the V64 register class.
2264 EVT VT = V128Reg.getValueType();
2265 unsigned WideSize = VT.getVectorNumElements();
2266 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2267 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2268
2269 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2270 V128Reg);
2271}
2272
2273void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2274 unsigned Opc) {
2275 SDLoc dl(N);
2276 EVT VT = N->getValueType(0);
2277 bool Narrow = VT.getSizeInBits() == 64;
2278
2279 // Form a REG_SEQUENCE to force register allocation.
2280 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2281
2282 if (Narrow)
2283 transform(Regs, Regs.begin(),
2284 WidenVector(*CurDAG));
2285
2286 SDValue RegSeq = createQTuple(Regs);
2287
2288 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2289
2290 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2291
2292 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2293 N->getOperand(NumVecs + 3), N->getOperand(0)};
2294 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2295 SDValue SuperReg = SDValue(Ld, 0);
2296
2297 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2298 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2299 AArch64::qsub2, AArch64::qsub3 };
2300 for (unsigned i = 0; i < NumVecs; ++i) {
2301 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2302 if (Narrow)
2303 NV = NarrowVector(NV, *CurDAG);
2304 ReplaceUses(SDValue(N, i), NV);
2305 }
2306
2307 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2308 CurDAG->RemoveDeadNode(N);
2309}
2310
2311void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2312 unsigned Opc) {
2313 SDLoc dl(N);
2314 EVT VT = N->getValueType(0);
2315 bool Narrow = VT.getSizeInBits() == 64;
2316
2317 // Form a REG_SEQUENCE to force register allocation.
2318 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2319
2320 if (Narrow)
2321 transform(Regs, Regs.begin(),
2322 WidenVector(*CurDAG));
2323
2324 SDValue RegSeq = createQTuple(Regs);
2325
2326 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2327 RegSeq->getValueType(0), MVT::Other};
2328
2329 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2330
2331 SDValue Ops[] = {RegSeq,
2332 CurDAG->getTargetConstant(LaneNo, dl,
2333 MVT::i64), // Lane Number
2334 N->getOperand(NumVecs + 2), // Base register
2335 N->getOperand(NumVecs + 3), // Incremental
2336 N->getOperand(0)};
2337 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2338
2339 // Update uses of the write back register
2340 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2341
2342 // Update uses of the vector list
2343 SDValue SuperReg = SDValue(Ld, 1);
2344 if (NumVecs == 1) {
2345 ReplaceUses(SDValue(N, 0),
2346 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2347 } else {
2348 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2349 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2350 AArch64::qsub2, AArch64::qsub3 };
2351 for (unsigned i = 0; i < NumVecs; ++i) {
2352 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2353 SuperReg);
2354 if (Narrow)
2355 NV = NarrowVector(NV, *CurDAG);
2356 ReplaceUses(SDValue(N, i), NV);
2357 }
2358 }
2359
2360 // Update the Chain
2361 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2362 CurDAG->RemoveDeadNode(N);
2363}
2364
2365void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2366 unsigned Opc) {
2367 SDLoc dl(N);
2368 EVT VT = N->getOperand(2)->getValueType(0);
2369 bool Narrow = VT.getSizeInBits() == 64;
2370
2371 // Form a REG_SEQUENCE to force register allocation.
2372 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2373
2374 if (Narrow)
2375 transform(Regs, Regs.begin(),
2376 WidenVector(*CurDAG));
2377
2378 SDValue RegSeq = createQTuple(Regs);
2379
2380 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2381
2382 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2383 N->getOperand(NumVecs + 3), N->getOperand(0)};
2384 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2385
2386 // Transfer memoperands.
2387 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2388 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2389
2390 ReplaceNode(N, St);
2391}
2392
2393void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2394 unsigned Opc) {
2395 SDLoc dl(N);
2396 EVT VT = N->getOperand(2)->getValueType(0);
2397 bool Narrow = VT.getSizeInBits() == 64;
2398
2399 // Form a REG_SEQUENCE to force register allocation.
2400 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2401
2402 if (Narrow)
2403 transform(Regs, Regs.begin(),
2404 WidenVector(*CurDAG));
2405
2406 SDValue RegSeq = createQTuple(Regs);
2407
2408 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2409 MVT::Other};
2410
2411 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2412
2413 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2414 N->getOperand(NumVecs + 2), // Base Register
2415 N->getOperand(NumVecs + 3), // Incremental
2416 N->getOperand(0)};
2417 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2418
2419 // Transfer memoperands.
2420 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2421 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2422
2423 ReplaceNode(N, St);
2424}
2425
2427 unsigned &Opc, SDValue &Opd0,
2428 unsigned &LSB, unsigned &MSB,
2429 unsigned NumberOfIgnoredLowBits,
2430 bool BiggerPattern) {
2431 assert(N->getOpcode() == ISD::AND &&
2432 "N must be a AND operation to call this function");
2433
2434 EVT VT = N->getValueType(0);
2435
2436 // Here we can test the type of VT and return false when the type does not
2437 // match, but since it is done prior to that call in the current context
2438 // we turned that into an assert to avoid redundant code.
2439 assert((VT == MVT::i32 || VT == MVT::i64) &&
2440 "Type checking must have been done before calling this function");
2441
2442 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2443 // changed the AND node to a 32-bit mask operation. We'll have to
2444 // undo that as part of the transform here if we want to catch all
2445 // the opportunities.
2446 // Currently the NumberOfIgnoredLowBits argument helps to recover
2447 // from these situations when matching bigger pattern (bitfield insert).
2448
2449 // For unsigned extracts, check for a shift right and mask
2450 uint64_t AndImm = 0;
2451 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2452 return false;
2453
2454 const SDNode *Op0 = N->getOperand(0).getNode();
2455
2456 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2457 // simplified. Try to undo that
2458 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2459
2460 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2461 if (AndImm & (AndImm + 1))
2462 return false;
2463
2464 bool ClampMSB = false;
2465 uint64_t SrlImm = 0;
2466 // Handle the SRL + ANY_EXTEND case.
2467 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2468 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2469 // Extend the incoming operand of the SRL to 64-bit.
2470 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2471 // Make sure to clamp the MSB so that we preserve the semantics of the
2472 // original operations.
2473 ClampMSB = true;
2474 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2476 SrlImm)) {
2477 // If the shift result was truncated, we can still combine them.
2478 Opd0 = Op0->getOperand(0).getOperand(0);
2479
2480 // Use the type of SRL node.
2481 VT = Opd0->getValueType(0);
2482 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2483 Opd0 = Op0->getOperand(0);
2484 ClampMSB = (VT == MVT::i32);
2485 } else if (BiggerPattern) {
2486 // Let's pretend a 0 shift right has been performed.
2487 // The resulting code will be at least as good as the original one
2488 // plus it may expose more opportunities for bitfield insert pattern.
2489 // FIXME: Currently we limit this to the bigger pattern, because
2490 // some optimizations expect AND and not UBFM.
2491 Opd0 = N->getOperand(0);
2492 } else
2493 return false;
2494
2495 // Bail out on large immediates. This happens when no proper
2496 // combining/constant folding was performed.
2497 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2498 LLVM_DEBUG(
2499 (dbgs() << N
2500 << ": Found large shift immediate, this should not happen\n"));
2501 return false;
2502 }
2503
2504 LSB = SrlImm;
2505 MSB = SrlImm +
2506 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2507 : llvm::countr_one<uint64_t>(AndImm)) -
2508 1;
2509 if (ClampMSB)
2510 // Since we're moving the extend before the right shift operation, we need
2511 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2512 // the zeros which would get shifted in with the original right shift
2513 // operation.
2514 MSB = MSB > 31 ? 31 : MSB;
2515
2516 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2517 return true;
2518}
2519
2520static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2521 SDValue &Opd0, unsigned &Immr,
2522 unsigned &Imms) {
2523 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2524
2525 EVT VT = N->getValueType(0);
2526 unsigned BitWidth = VT.getSizeInBits();
2527 assert((VT == MVT::i32 || VT == MVT::i64) &&
2528 "Type checking must have been done before calling this function");
2529
2530 SDValue Op = N->getOperand(0);
2531 if (Op->getOpcode() == ISD::TRUNCATE) {
2532 Op = Op->getOperand(0);
2533 VT = Op->getValueType(0);
2534 BitWidth = VT.getSizeInBits();
2535 }
2536
2537 uint64_t ShiftImm;
2538 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2539 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2540 return false;
2541
2542 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2543 if (ShiftImm + Width > BitWidth)
2544 return false;
2545
2546 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2547 Opd0 = Op.getOperand(0);
2548 Immr = ShiftImm;
2549 Imms = ShiftImm + Width - 1;
2550 return true;
2551}
2552
2553static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2554 SDValue &Opd0, unsigned &LSB,
2555 unsigned &MSB) {
2556 // We are looking for the following pattern which basically extracts several
2557 // continuous bits from the source value and places it from the LSB of the
2558 // destination value, all other bits of the destination value or set to zero:
2559 //
2560 // Value2 = AND Value, MaskImm
2561 // SRL Value2, ShiftImm
2562 //
2563 // with MaskImm >> ShiftImm to search for the bit width.
2564 //
2565 // This gets selected into a single UBFM:
2566 //
2567 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2568 //
2569
2570 if (N->getOpcode() != ISD::SRL)
2571 return false;
2572
2573 uint64_t AndMask = 0;
2574 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2575 return false;
2576
2577 Opd0 = N->getOperand(0).getOperand(0);
2578
2579 uint64_t SrlImm = 0;
2580 if (!isIntImmediate(N->getOperand(1), SrlImm))
2581 return false;
2582
2583 // Check whether we really have several bits extract here.
2584 if (!isMask_64(AndMask >> SrlImm))
2585 return false;
2586
2587 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2588 LSB = SrlImm;
2589 MSB = llvm::Log2_64(AndMask);
2590 return true;
2591}
2592
2593static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2594 unsigned &Immr, unsigned &Imms,
2595 bool BiggerPattern) {
2596 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2597 "N must be a SHR/SRA operation to call this function");
2598
2599 EVT VT = N->getValueType(0);
2600
2601 // Here we can test the type of VT and return false when the type does not
2602 // match, but since it is done prior to that call in the current context
2603 // we turned that into an assert to avoid redundant code.
2604 assert((VT == MVT::i32 || VT == MVT::i64) &&
2605 "Type checking must have been done before calling this function");
2606
2607 // Check for AND + SRL doing several bits extract.
2608 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2609 return true;
2610
2611 // We're looking for a shift of a shift.
2612 uint64_t ShlImm = 0;
2613 uint64_t TruncBits = 0;
2614 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2615 Opd0 = N->getOperand(0).getOperand(0);
2616 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2617 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2618 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2619 // be considered as setting high 32 bits as zero. Our strategy here is to
2620 // always generate 64bit UBFM. This consistency will help the CSE pass
2621 // later find more redundancy.
2622 Opd0 = N->getOperand(0).getOperand(0);
2623 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2624 VT = Opd0.getValueType();
2625 assert(VT == MVT::i64 && "the promoted type should be i64");
2626 } else if (BiggerPattern) {
2627 // Let's pretend a 0 shift left has been performed.
2628 // FIXME: Currently we limit this to the bigger pattern case,
2629 // because some optimizations expect AND and not UBFM
2630 Opd0 = N->getOperand(0);
2631 } else
2632 return false;
2633
2634 // Missing combines/constant folding may have left us with strange
2635 // constants.
2636 if (ShlImm >= VT.getSizeInBits()) {
2637 LLVM_DEBUG(
2638 (dbgs() << N
2639 << ": Found large shift immediate, this should not happen\n"));
2640 return false;
2641 }
2642
2643 uint64_t SrlImm = 0;
2644 if (!isIntImmediate(N->getOperand(1), SrlImm))
2645 return false;
2646
2647 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2648 "bad amount in shift node!");
2649 int immr = SrlImm - ShlImm;
2650 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2651 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2652 // SRA requires a signed extraction
2653 if (VT == MVT::i32)
2654 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2655 else
2656 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2657 return true;
2658}
2659
2660bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2661 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2662
2663 EVT VT = N->getValueType(0);
2664 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2665 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2666 return false;
2667
2668 uint64_t ShiftImm;
2669 SDValue Op = N->getOperand(0);
2670 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2671 return false;
2672
2673 SDLoc dl(N);
2674 // Extend the incoming operand of the shift to 64-bits.
2675 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2676 unsigned Immr = ShiftImm;
2677 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2678 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2679 CurDAG->getTargetConstant(Imms, dl, VT)};
2680 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2681 return true;
2682}
2683
2684static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2685 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2686 unsigned NumberOfIgnoredLowBits = 0,
2687 bool BiggerPattern = false) {
2688 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2689 return false;
2690
2691 switch (N->getOpcode()) {
2692 default:
2693 if (!N->isMachineOpcode())
2694 return false;
2695 break;
2696 case ISD::AND:
2697 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2698 NumberOfIgnoredLowBits, BiggerPattern);
2699 case ISD::SRL:
2700 case ISD::SRA:
2701 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2702
2704 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2705 }
2706
2707 unsigned NOpc = N->getMachineOpcode();
2708 switch (NOpc) {
2709 default:
2710 return false;
2711 case AArch64::SBFMWri:
2712 case AArch64::UBFMWri:
2713 case AArch64::SBFMXri:
2714 case AArch64::UBFMXri:
2715 Opc = NOpc;
2716 Opd0 = N->getOperand(0);
2717 Immr = N->getConstantOperandVal(1);
2718 Imms = N->getConstantOperandVal(2);
2719 return true;
2720 }
2721 // Unreachable
2722 return false;
2723}
2724
2725bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2726 unsigned Opc, Immr, Imms;
2727 SDValue Opd0;
2728 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2729 return false;
2730
2731 EVT VT = N->getValueType(0);
2732 SDLoc dl(N);
2733
2734 // If the bit extract operation is 64bit but the original type is 32bit, we
2735 // need to add one EXTRACT_SUBREG.
2736 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2737 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2738 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2739
2740 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2741 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2742 MVT::i32, SDValue(BFM, 0));
2743 ReplaceNode(N, Inner.getNode());
2744 return true;
2745 }
2746
2747 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2748 CurDAG->getTargetConstant(Imms, dl, VT)};
2749 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2750 return true;
2751}
2752
2753/// Does DstMask form a complementary pair with the mask provided by
2754/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2755/// this asks whether DstMask zeroes precisely those bits that will be set by
2756/// the other half.
2757static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2758 unsigned NumberOfIgnoredHighBits, EVT VT) {
2759 assert((VT == MVT::i32 || VT == MVT::i64) &&
2760 "i32 or i64 mask type expected!");
2761 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2762
2763 APInt SignificantDstMask = APInt(BitWidth, DstMask);
2764 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2765
2766 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2767 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2768}
2769
2770// Look for bits that will be useful for later uses.
2771// A bit is consider useless as soon as it is dropped and never used
2772// before it as been dropped.
2773// E.g., looking for useful bit of x
2774// 1. y = x & 0x7
2775// 2. z = y >> 2
2776// After #1, x useful bits are 0x7, then the useful bits of x, live through
2777// y.
2778// After #2, the useful bits of x are 0x4.
2779// However, if x is used on an unpredicatable instruction, then all its bits
2780// are useful.
2781// E.g.
2782// 1. y = x & 0x7
2783// 2. z = y >> 2
2784// 3. str x, [@x]
2785static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2786
2788 unsigned Depth) {
2789 uint64_t Imm =
2790 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2791 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2792 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2793 getUsefulBits(Op, UsefulBits, Depth + 1);
2794}
2795
2797 uint64_t Imm, uint64_t MSB,
2798 unsigned Depth) {
2799 // inherit the bitwidth value
2800 APInt OpUsefulBits(UsefulBits);
2801 OpUsefulBits = 1;
2802
2803 if (MSB >= Imm) {
2804 OpUsefulBits <<= MSB - Imm + 1;
2805 --OpUsefulBits;
2806 // The interesting part will be in the lower part of the result
2807 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2808 // The interesting part was starting at Imm in the argument
2809 OpUsefulBits <<= Imm;
2810 } else {
2811 OpUsefulBits <<= MSB + 1;
2812 --OpUsefulBits;
2813 // The interesting part will be shifted in the result
2814 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2815 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2816 // The interesting part was at zero in the argument
2817 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2818 }
2819
2820 UsefulBits &= OpUsefulBits;
2821}
2822
2823static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2824 unsigned Depth) {
2825 uint64_t Imm =
2826 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2827 uint64_t MSB =
2828 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2829
2830 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2831}
2832
2834 unsigned Depth) {
2835 uint64_t ShiftTypeAndValue =
2836 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2837 APInt Mask(UsefulBits);
2838 Mask.clearAllBits();
2839 Mask.flipAllBits();
2840
2841 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2842 // Shift Left
2843 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2844 Mask <<= ShiftAmt;
2845 getUsefulBits(Op, Mask, Depth + 1);
2846 Mask.lshrInPlace(ShiftAmt);
2847 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2848 // Shift Right
2849 // We do not handle AArch64_AM::ASR, because the sign will change the
2850 // number of useful bits
2851 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2852 Mask.lshrInPlace(ShiftAmt);
2853 getUsefulBits(Op, Mask, Depth + 1);
2854 Mask <<= ShiftAmt;
2855 } else
2856 return;
2857
2858 UsefulBits &= Mask;
2859}
2860
2861static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2862 unsigned Depth) {
2863 uint64_t Imm =
2864 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2865 uint64_t MSB =
2866 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2867
2868 APInt OpUsefulBits(UsefulBits);
2869 OpUsefulBits = 1;
2870
2871 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2872 ResultUsefulBits.flipAllBits();
2873 APInt Mask(UsefulBits.getBitWidth(), 0);
2874
2875 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2876
2877 if (MSB >= Imm) {
2878 // The instruction is a BFXIL.
2879 uint64_t Width = MSB - Imm + 1;
2880 uint64_t LSB = Imm;
2881
2882 OpUsefulBits <<= Width;
2883 --OpUsefulBits;
2884
2885 if (Op.getOperand(1) == Orig) {
2886 // Copy the low bits from the result to bits starting from LSB.
2887 Mask = ResultUsefulBits & OpUsefulBits;
2888 Mask <<= LSB;
2889 }
2890
2891 if (Op.getOperand(0) == Orig)
2892 // Bits starting from LSB in the input contribute to the result.
2893 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2894 } else {
2895 // The instruction is a BFI.
2896 uint64_t Width = MSB + 1;
2897 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2898
2899 OpUsefulBits <<= Width;
2900 --OpUsefulBits;
2901 OpUsefulBits <<= LSB;
2902
2903 if (Op.getOperand(1) == Orig) {
2904 // Copy the bits from the result to the zero bits.
2905 Mask = ResultUsefulBits & OpUsefulBits;
2906 Mask.lshrInPlace(LSB);
2907 }
2908
2909 if (Op.getOperand(0) == Orig)
2910 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2911 }
2912
2913 UsefulBits &= Mask;
2914}
2915
2916static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2917 SDValue Orig, unsigned Depth) {
2918
2919 // Users of this node should have already been instruction selected
2920 // FIXME: Can we turn that into an assert?
2921 if (!UserNode->isMachineOpcode())
2922 return;
2923
2924 switch (UserNode->getMachineOpcode()) {
2925 default:
2926 return;
2927 case AArch64::ANDSWri:
2928 case AArch64::ANDSXri:
2929 case AArch64::ANDWri:
2930 case AArch64::ANDXri:
2931 // We increment Depth only when we call the getUsefulBits
2932 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2933 Depth);
2934 case AArch64::UBFMWri:
2935 case AArch64::UBFMXri:
2936 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2937
2938 case AArch64::ORRWrs:
2939 case AArch64::ORRXrs:
2940 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
2941 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2942 Depth);
2943 return;
2944 case AArch64::BFMWri:
2945 case AArch64::BFMXri:
2946 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2947
2948 case AArch64::STRBBui:
2949 case AArch64::STURBBi:
2950 if (UserNode->getOperand(0) != Orig)
2951 return;
2952 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2953 return;
2954
2955 case AArch64::STRHHui:
2956 case AArch64::STURHHi:
2957 if (UserNode->getOperand(0) != Orig)
2958 return;
2959 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2960 return;
2961 }
2962}
2963
2964static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2966 return;
2967 // Initialize UsefulBits
2968 if (!Depth) {
2969 unsigned Bitwidth = Op.getScalarValueSizeInBits();
2970 // At the beginning, assume every produced bits is useful
2971 UsefulBits = APInt(Bitwidth, 0);
2972 UsefulBits.flipAllBits();
2973 }
2974 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2975
2976 for (SDNode *Node : Op.getNode()->uses()) {
2977 // A use cannot produce useful bits
2978 APInt UsefulBitsForUse = APInt(UsefulBits);
2979 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2980 UsersUsefulBits |= UsefulBitsForUse;
2981 }
2982 // UsefulBits contains the produced bits that are meaningful for the
2983 // current definition, thus a user cannot make a bit meaningful at
2984 // this point
2985 UsefulBits &= UsersUsefulBits;
2986}
2987
2988/// Create a machine node performing a notional SHL of Op by ShlAmount. If
2989/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2990/// 0, return Op unchanged.
2991static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2992 if (ShlAmount == 0)
2993 return Op;
2994
2995 EVT VT = Op.getValueType();
2996 SDLoc dl(Op);
2997 unsigned BitWidth = VT.getSizeInBits();
2998 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2999
3000 SDNode *ShiftNode;
3001 if (ShlAmount > 0) {
3002 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3003 ShiftNode = CurDAG->getMachineNode(
3004 UBFMOpc, dl, VT, Op,
3005 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3006 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3007 } else {
3008 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3009 assert(ShlAmount < 0 && "expected right shift");
3010 int ShrAmount = -ShlAmount;
3011 ShiftNode = CurDAG->getMachineNode(
3012 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3013 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3014 }
3015
3016 return SDValue(ShiftNode, 0);
3017}
3018
3019// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3021 bool BiggerPattern,
3022 const uint64_t NonZeroBits,
3023 SDValue &Src, int &DstLSB,
3024 int &Width);
3025
3026// For bit-field-positioning pattern "shl VAL, N)".
3028 bool BiggerPattern,
3029 const uint64_t NonZeroBits,
3030 SDValue &Src, int &DstLSB,
3031 int &Width);
3032
3033/// Does this tree qualify as an attempt to move a bitfield into position,
3034/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3036 bool BiggerPattern, SDValue &Src,
3037 int &DstLSB, int &Width) {
3038 EVT VT = Op.getValueType();
3039 unsigned BitWidth = VT.getSizeInBits();
3040 (void)BitWidth;
3041 assert(BitWidth == 32 || BitWidth == 64);
3042
3043 KnownBits Known = CurDAG->computeKnownBits(Op);
3044
3045 // Non-zero in the sense that they're not provably zero, which is the key
3046 // point if we want to use this value
3047 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3048 if (!isShiftedMask_64(NonZeroBits))
3049 return false;
3050
3051 switch (Op.getOpcode()) {
3052 default:
3053 break;
3054 case ISD::AND:
3055 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3056 NonZeroBits, Src, DstLSB, Width);
3057 case ISD::SHL:
3058 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3059 NonZeroBits, Src, DstLSB, Width);
3060 }
3061
3062 return false;
3063}
3064
3066 bool BiggerPattern,
3067 const uint64_t NonZeroBits,
3068 SDValue &Src, int &DstLSB,
3069 int &Width) {
3070 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3071
3072 EVT VT = Op.getValueType();
3073 assert((VT == MVT::i32 || VT == MVT::i64) &&
3074 "Caller guarantees VT is one of i32 or i64");
3075 (void)VT;
3076
3077 uint64_t AndImm;
3078 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3079 return false;
3080
3081 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3082 // 1) (AndImm & (1 << POS) == 0)
3083 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3084 //
3085 // 1) and 2) don't agree so something must be wrong (e.g., in
3086 // 'SelectionDAG::computeKnownBits')
3087 assert((~AndImm & NonZeroBits) == 0 &&
3088 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3089
3090 SDValue AndOp0 = Op.getOperand(0);
3091
3092 uint64_t ShlImm;
3093 SDValue ShlOp0;
3094 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3095 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3096 ShlOp0 = AndOp0.getOperand(0);
3097 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3099 ShlImm)) {
3100 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3101
3102 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3103 SDValue ShlVal = AndOp0.getOperand(0);
3104
3105 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3106 // expect VT to be MVT::i32.
3107 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3108
3109 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3110 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3111 } else
3112 return false;
3113
3114 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3115 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3116 // AndOp0+AND.
3117 if (!BiggerPattern && !AndOp0.hasOneUse())
3118 return false;
3119
3120 DstLSB = llvm::countr_zero(NonZeroBits);
3121 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3122
3123 // Bail out on large Width. This happens when no proper combining / constant
3124 // folding was performed.
3125 if (Width >= (int)VT.getSizeInBits()) {
3126 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3127 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3128 // "val".
3129 // If VT is i32, what Width >= 32 means:
3130 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3131 // demands at least 'Width' bits (after dag-combiner). This together with
3132 // `any_extend` Op (undefined higher bits) indicates missed combination
3133 // when lowering the 'and' IR instruction to an machine IR instruction.
3134 LLVM_DEBUG(
3135 dbgs()
3136 << "Found large Width in bit-field-positioning -- this indicates no "
3137 "proper combining / constant folding was performed\n");
3138 return false;
3139 }
3140
3141 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3142 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3143 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3144 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3145 // which case it is not profitable to insert an extra shift.
3146 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3147 return false;
3148
3149 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3150 return true;
3151}
3152
3153// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3154// UBFIZ.
3156 SDValue &Src, int &DstLSB,
3157 int &Width) {
3158 // Caller should have verified that N is a left shift with constant shift
3159 // amount; asserts that.
3160 assert(Op.getOpcode() == ISD::SHL &&
3161 "Op.getNode() should be a SHL node to call this function");
3162 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3163 "Op.getNode() should shift ShlImm to call this function");
3164
3165 uint64_t AndImm = 0;
3166 SDValue Op0 = Op.getOperand(0);
3167 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3168 return false;
3169
3170 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3171 if (isMask_64(ShiftedAndImm)) {
3172 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3173 // should end with Mask, and could be prefixed with random bits if those
3174 // bits are shifted out.
3175 //
3176 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3177 // the AND result corresponding to those bits are shifted out, so it's fine
3178 // to not extract them.
3179 Width = llvm::countr_one(ShiftedAndImm);
3180 DstLSB = ShlImm;
3181 Src = Op0.getOperand(0);
3182 return true;
3183 }
3184 return false;
3185}
3186
3188 bool BiggerPattern,
3189 const uint64_t NonZeroBits,
3190 SDValue &Src, int &DstLSB,
3191 int &Width) {
3192 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3193
3194 EVT VT = Op.getValueType();
3195 assert((VT == MVT::i32 || VT == MVT::i64) &&
3196 "Caller guarantees that type is i32 or i64");
3197 (void)VT;
3198
3199 uint64_t ShlImm;
3200 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3201 return false;
3202
3203 if (!BiggerPattern && !Op.hasOneUse())
3204 return false;
3205
3206 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3207 return true;
3208
3209 DstLSB = llvm::countr_zero(NonZeroBits);
3210 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3211
3212 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3213 return false;
3214
3215 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3216 return true;
3217}
3218
3219static bool isShiftedMask(uint64_t Mask, EVT VT) {
3220 assert(VT == MVT::i32 || VT == MVT::i64);
3221 if (VT == MVT::i32)
3222 return isShiftedMask_32(Mask);
3223 return isShiftedMask_64(Mask);
3224}
3225
3226// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3227// inserted only sets known zero bits.
3229 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3230
3231 EVT VT = N->getValueType(0);
3232 if (VT != MVT::i32 && VT != MVT::i64)
3233 return false;
3234
3235 unsigned BitWidth = VT.getSizeInBits();
3236
3237 uint64_t OrImm;
3238 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3239 return false;
3240
3241 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3242 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3243 // performance neutral.
3245 return false;
3246
3247 uint64_t MaskImm;
3248 SDValue And = N->getOperand(0);
3249 // Must be a single use AND with an immediate operand.
3250 if (!And.hasOneUse() ||
3251 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3252 return false;
3253
3254 // Compute the Known Zero for the AND as this allows us to catch more general
3255 // cases than just looking for AND with imm.
3256 KnownBits Known = CurDAG->computeKnownBits(And);
3257
3258 // Non-zero in the sense that they're not provably zero, which is the key
3259 // point if we want to use this value.
3260 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3261
3262 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3263 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3264 return false;
3265
3266 // The bits being inserted must only set those bits that are known to be zero.
3267 if ((OrImm & NotKnownZero) != 0) {
3268 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3269 // currently handle this case.
3270 return false;
3271 }
3272
3273 // BFI/BFXIL dst, src, #lsb, #width.
3274 int LSB = llvm::countr_one(NotKnownZero);
3275 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3276
3277 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3278 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3279 unsigned ImmS = Width - 1;
3280
3281 // If we're creating a BFI instruction avoid cases where we need more
3282 // instructions to materialize the BFI constant as compared to the original
3283 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3284 // should be no worse in this case.
3285 bool IsBFI = LSB != 0;
3286 uint64_t BFIImm = OrImm >> LSB;
3287 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3288 // We have a BFI instruction and we know the constant can't be materialized
3289 // with a ORR-immediate with the zero register.
3290 unsigned OrChunks = 0, BFIChunks = 0;
3291 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3292 if (((OrImm >> Shift) & 0xFFFF) != 0)
3293 ++OrChunks;
3294 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3295 ++BFIChunks;
3296 }
3297 if (BFIChunks > OrChunks)
3298 return false;
3299 }
3300
3301 // Materialize the constant to be inserted.
3302 SDLoc DL(N);
3303 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3304 SDNode *MOVI = CurDAG->getMachineNode(
3305 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3306
3307 // Create the BFI/BFXIL instruction.
3308 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3309 CurDAG->getTargetConstant(ImmR, DL, VT),
3310 CurDAG->getTargetConstant(ImmS, DL, VT)};
3311 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3312 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3313 return true;
3314}
3315
3317 SDValue &ShiftedOperand,
3318 uint64_t &EncodedShiftImm) {
3319 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3320 if (!Dst.hasOneUse())
3321 return false;
3322
3323 EVT VT = Dst.getValueType();
3324 assert((VT == MVT::i32 || VT == MVT::i64) &&
3325 "Caller should guarantee that VT is one of i32 or i64");
3326 const unsigned SizeInBits = VT.getSizeInBits();
3327
3328 SDLoc DL(Dst.getNode());
3329 uint64_t AndImm, ShlImm;
3330 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3331 isShiftedMask_64(AndImm)) {
3332 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3333 SDValue DstOp0 = Dst.getOperand(0);
3334 if (!DstOp0.hasOneUse())
3335 return false;
3336
3337 // An example to illustrate the transformation
3338 // From:
3339 // lsr x8, x1, #1
3340 // and x8, x8, #0x3f80
3341 // bfxil x8, x1, #0, #7
3342 // To:
3343 // and x8, x23, #0x7f
3344 // ubfx x9, x23, #8, #7
3345 // orr x23, x8, x9, lsl #7
3346 //
3347 // The number of instructions remains the same, but ORR is faster than BFXIL
3348 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3349 // the dependency chain is improved after the transformation.
3350 uint64_t SrlImm;
3351 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3352 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3353 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3354 unsigned MaskWidth =
3355 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3356 unsigned UBFMOpc =
3357 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3358 SDNode *UBFMNode = CurDAG->getMachineNode(
3359 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3360 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3361 VT),
3362 CurDAG->getTargetConstant(
3363 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3364 ShiftedOperand = SDValue(UBFMNode, 0);
3365 EncodedShiftImm = AArch64_AM::getShifterImm(
3366 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3367 return true;
3368 }
3369 }
3370 return false;
3371 }
3372
3373 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3374 ShiftedOperand = Dst.getOperand(0);
3375 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3376 return true;
3377 }
3378
3379 uint64_t SrlImm;
3380 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3381 ShiftedOperand = Dst.getOperand(0);
3382 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3383 return true;
3384 }
3385 return false;
3386}
3387
3388// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3389// the operands and select it to AArch64::ORR with shifted registers if
3390// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3391static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3392 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3393 const bool BiggerPattern) {
3394 EVT VT = N->getValueType(0);
3395 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3396 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3397 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3398 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3399 assert((VT == MVT::i32 || VT == MVT::i64) &&
3400 "Expect result type to be i32 or i64 since N is combinable to BFM");
3401 SDLoc DL(N);
3402
3403 // Bail out if BFM simplifies away one node in BFM Dst.
3404 if (OrOpd1 != Dst)
3405 return false;
3406
3407 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3408 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3409 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3410 if (BiggerPattern) {
3411 uint64_t SrcAndImm;
3412 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3413 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3414 // OrOpd0 = AND Src, #Mask
3415 // So BFM simplifies away one AND node from Src and doesn't simplify away
3416 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3417 // one node (from Rd), ORR is better since it has higher throughput and
3418 // smaller latency than BFM on many AArch64 processors (and for the rest
3419 // ORR is at least as good as BFM).
3420 SDValue ShiftedOperand;
3421 uint64_t EncodedShiftImm;
3422 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3423 EncodedShiftImm)) {
3424 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3425 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3426 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3427 return true;
3428 }
3429 }
3430 return false;
3431 }
3432
3433 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3434
3435 uint64_t ShlImm;
3436 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3437 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3438 SDValue Ops[] = {
3439 Dst, Src,
3440 CurDAG->getTargetConstant(
3442 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3443 return true;
3444 }
3445
3446 // Select the following pattern to left-shifted operand rather than BFI.
3447 // %val1 = op ..
3448 // %val2 = shl %val1, #imm
3449 // %res = or %val1, %val2
3450 //
3451 // If N is selected to be BFI, we know that
3452 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3453 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3454 //
3455 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3456 if (OrOpd0.getOperand(0) == OrOpd1) {
3457 SDValue Ops[] = {
3458 OrOpd1, OrOpd1,
3459 CurDAG->getTargetConstant(
3461 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3462 return true;
3463 }
3464 }
3465
3466 uint64_t SrlImm;
3467 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3468 // Select the following pattern to right-shifted operand rather than BFXIL.
3469 // %val1 = op ..
3470 // %val2 = lshr %val1, #imm
3471 // %res = or %val1, %val2
3472 //
3473 // If N is selected to be BFXIL, we know that
3474 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3475 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3476 //
3477 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3478 if (OrOpd0.getOperand(0) == OrOpd1) {
3479 SDValue Ops[] = {
3480 OrOpd1, OrOpd1,
3481 CurDAG->getTargetConstant(
3483 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3484 return true;
3485 }
3486 }
3487
3488 return false;
3489}
3490
3491static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3492 SelectionDAG *CurDAG) {
3493 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3494
3495 EVT VT = N->getValueType(0);
3496 if (VT != MVT::i32 && VT != MVT::i64)
3497 return false;
3498
3499 unsigned BitWidth = VT.getSizeInBits();
3500
3501 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3502 // have the expected shape. Try to undo that.
3503
3504 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3505 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3506
3507 // Given a OR operation, check if we have the following pattern
3508 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3509 // isBitfieldExtractOp)
3510 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3511 // countTrailingZeros(mask2) == imm2 - imm + 1
3512 // f = d | c
3513 // if yes, replace the OR instruction with:
3514 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3515
3516 // OR is commutative, check all combinations of operand order and values of
3517 // BiggerPattern, i.e.
3518 // Opd0, Opd1, BiggerPattern=false
3519 // Opd1, Opd0, BiggerPattern=false
3520 // Opd0, Opd1, BiggerPattern=true
3521 // Opd1, Opd0, BiggerPattern=true
3522 // Several of these combinations may match, so check with BiggerPattern=false
3523 // first since that will produce better results by matching more instructions
3524 // and/or inserting fewer extra instructions.
3525 for (int I = 0; I < 4; ++I) {
3526
3527 SDValue Dst, Src;
3528 unsigned ImmR, ImmS;
3529 bool BiggerPattern = I / 2;
3530 SDValue OrOpd0Val = N->getOperand(I % 2);
3531 SDNode *OrOpd0 = OrOpd0Val.getNode();
3532 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3533 SDNode *OrOpd1 = OrOpd1Val.getNode();
3534
3535 unsigned BFXOpc;
3536 int DstLSB, Width;
3537 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3538 NumberOfIgnoredLowBits, BiggerPattern)) {
3539 // Check that the returned opcode is compatible with the pattern,
3540 // i.e., same type and zero extended (U and not S)
3541 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3542 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3543 continue;
3544
3545 // Compute the width of the bitfield insertion
3546 DstLSB = 0;
3547 Width = ImmS - ImmR + 1;
3548 // FIXME: This constraint is to catch bitfield insertion we may
3549 // want to widen the pattern if we want to grab general bitfied
3550 // move case
3551 if (Width <= 0)
3552 continue;
3553
3554 // If the mask on the insertee is correct, we have a BFXIL operation. We
3555 // can share the ImmR and ImmS values from the already-computed UBFM.
3556 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3557 BiggerPattern,
3558 Src, DstLSB, Width)) {
3559 ImmR = (BitWidth - DstLSB) % BitWidth;
3560 ImmS = Width - 1;
3561 } else
3562 continue;
3563
3564 // Check the second part of the pattern
3565 EVT VT = OrOpd1Val.getValueType();
3566 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3567
3568 // Compute the Known Zero for the candidate of the first operand.
3569 // This allows to catch more general case than just looking for
3570 // AND with imm. Indeed, simplify-demanded-bits may have removed
3571 // the AND instruction because it proves it was useless.
3572 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3573
3574 // Check if there is enough room for the second operand to appear
3575 // in the first one
3576 APInt BitsToBeInserted =
3577 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3578
3579 if ((BitsToBeInserted & ~Known.Zero) != 0)
3580 continue;
3581
3582 // Set the first operand
3583 uint64_t Imm;
3584 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3585 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3586 // In that case, we can eliminate the AND
3587 Dst = OrOpd1->getOperand(0);
3588 else
3589 // Maybe the AND has been removed by simplify-demanded-bits
3590 // or is useful because it discards more bits
3591 Dst = OrOpd1Val;
3592
3593 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3594 // with shifted operand is more efficient.
3595 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3596 BiggerPattern))
3597 return true;
3598
3599 // both parts match
3600 SDLoc DL(N);
3601 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3602 CurDAG->getTargetConstant(ImmS, DL, VT)};
3603 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3604 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3605 return true;
3606 }
3607
3608 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3609 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3610 // mask (e.g., 0x000ffff0).
3611 uint64_t Mask0Imm, Mask1Imm;
3612 SDValue And0 = N->getOperand(0);
3613 SDValue And1 = N->getOperand(1);
3614 if (And0.hasOneUse() && And1.hasOneUse() &&
3615 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3616 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3617 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3618 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3619
3620 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3621 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3622 // bits to be inserted.
3623 if (isShiftedMask(Mask0Imm, VT)) {
3624 std::swap(And0, And1);
3625 std::swap(Mask0Imm, Mask1Imm);
3626 }
3627
3628 SDValue Src = And1->getOperand(0);
3629 SDValue Dst = And0->getOperand(0);
3630 unsigned LSB = llvm::countr_zero(Mask1Imm);
3631 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3632
3633 // The BFXIL inserts the low-order bits from a source register, so right
3634 // shift the needed bits into place.
3635 SDLoc DL(N);
3636 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3637 uint64_t LsrImm = LSB;
3638 if (Src->hasOneUse() &&
3639 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3640 (LsrImm + LSB) < BitWidth) {
3641 Src = Src->getOperand(0);
3642 LsrImm += LSB;
3643 }
3644
3645 SDNode *LSR = CurDAG->getMachineNode(
3646 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3647 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3648
3649 // BFXIL is an alias of BFM, so translate to BFM operands.
3650 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3651 unsigned ImmS = Width - 1;
3652
3653 // Create the BFXIL instruction.
3654 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3655 CurDAG->getTargetConstant(ImmR, DL, VT),
3656 CurDAG->getTargetConstant(ImmS, DL, VT)};
3657 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3658 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3659 return true;
3660 }
3661
3662 return false;
3663}
3664
3665bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3666 if (N->getOpcode() != ISD::OR)
3667 return false;
3668
3669 APInt NUsefulBits;
3670 getUsefulBits(SDValue(N, 0), NUsefulBits);
3671
3672 // If all bits are not useful, just return UNDEF.
3673 if (!NUsefulBits) {
3674 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3675 return true;
3676 }
3677
3678 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3679 return true;
3680
3681 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3682}
3683
3684/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3685/// equivalent of a left shift by a constant amount followed by an and masking
3686/// out a contiguous set of bits.
3687bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3688 if (N->getOpcode() != ISD::AND)
3689 return false;
3690
3691 EVT VT = N->getValueType(0);
3692 if (VT != MVT::i32 && VT != MVT::i64)
3693 return false;
3694
3695 SDValue Op0;
3696 int DstLSB, Width;
3697 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3698 Op0, DstLSB, Width))
3699 return false;
3700
3701 // ImmR is the rotate right amount.
3702 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3703 // ImmS is the most significant bit of the source to be moved.
3704 unsigned ImmS = Width - 1;
3705
3706 SDLoc DL(N);
3707 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3708 CurDAG->getTargetConstant(ImmS, DL, VT)};
3709 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3710 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3711 return true;
3712}
3713
3714/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3715/// variable shift/rotate instructions.
3716bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3717 EVT VT = N->getValueType(0);
3718
3719 unsigned Opc;
3720 switch (N->getOpcode()) {
3721 case ISD::ROTR:
3722 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3723 break;
3724 case ISD::SHL:
3725 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3726 break;
3727 case ISD::SRL:
3728 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3729 break;
3730 case ISD::SRA:
3731 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3732 break;
3733 default:
3734 return false;
3735 }
3736
3737 uint64_t Size;
3738 uint64_t Bits;
3739 if (VT == MVT::i32) {
3740 Bits = 5;
3741 Size = 32;
3742 } else if (VT == MVT::i64) {
3743 Bits = 6;
3744 Size = 64;
3745 } else
3746 return false;
3747
3748 SDValue ShiftAmt = N->getOperand(1);
3749 SDLoc DL(N);
3750 SDValue NewShiftAmt;
3751
3752 // Skip over an extend of the shift amount.
3753 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3754 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3755 ShiftAmt = ShiftAmt->getOperand(0);
3756
3757 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3758 SDValue Add0 = ShiftAmt->getOperand(0);
3759 SDValue Add1 = ShiftAmt->getOperand(1);
3760 uint64_t Add0Imm;
3761 uint64_t Add1Imm;
3762 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3763 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3764 // to avoid the ADD/SUB.
3765 NewShiftAmt = Add0;
3766 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3767 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3768 (Add0Imm % Size == 0)) {
3769 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3770 // to generate a NEG instead of a SUB from a constant.
3771 unsigned NegOpc;
3772 unsigned ZeroReg;
3773 EVT SubVT = ShiftAmt->getValueType(0);
3774 if (SubVT == MVT::i32) {
3775 NegOpc = AArch64::SUBWrr;
3776 ZeroReg = AArch64::WZR;
3777 } else {
3778 assert(SubVT == MVT::i64);
3779 NegOpc = AArch64::SUBXrr;
3780 ZeroReg = AArch64::XZR;
3781 }
3782 SDValue Zero =
3783 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3784 MachineSDNode *Neg =
3785 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3786 NewShiftAmt = SDValue(Neg, 0);
3787 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3788 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3789 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3790 // to generate a NOT instead of a SUB from a constant.
3791 unsigned NotOpc;
3792 unsigned ZeroReg;
3793 EVT SubVT = ShiftAmt->getValueType(0);
3794 if (SubVT == MVT::i32) {
3795 NotOpc = AArch64::ORNWrr;
3796 ZeroReg = AArch64::WZR;
3797 } else {
3798 assert(SubVT == MVT::i64);
3799 NotOpc = AArch64::ORNXrr;
3800 ZeroReg = AArch64::XZR;
3801 }
3802 SDValue Zero =
3803 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3805 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3806 NewShiftAmt = SDValue(Not, 0);
3807 } else
3808 return false;
3809 } else {
3810 // If the shift amount is masked with an AND, check that the mask covers the
3811 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3812 // the AND.
3813 uint64_t MaskImm;
3814 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3815 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3816 return false;
3817
3818 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3819 return false;
3820
3821 NewShiftAmt = ShiftAmt->getOperand(0);
3822 }
3823
3824 // Narrow/widen the shift amount to match the size of the shift operation.
3825 if (VT == MVT::i32)
3826 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3827 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3828 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3829 MachineSDNode *Ext = CurDAG->getMachineNode(
3830 AArch64::SUBREG_TO_REG, DL, VT,
3831 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3832 NewShiftAmt = SDValue(Ext, 0);
3833 }
3834
3835 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3836 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3837 return true;
3838}
3839
3841 SDValue &FixedPos,
3842 unsigned RegWidth,
3843 bool isReciprocal) {
3844 APFloat FVal(0.0);
3845 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
3846 FVal = CN->getValueAPF();
3847 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3848 // Some otherwise illegal constants are allowed in this case.
3849 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3850 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3851 return false;
3852
3853 ConstantPoolSDNode *CN =
3854 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3855 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3856 } else
3857 return false;
3858
3859 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3860 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3861 // x-register.
3862 //
3863 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3864 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3865 // integers.
3866 bool IsExact;
3867
3868 if (isReciprocal)
3869 if (!FVal.getExactInverse(&FVal))
3870 return false;
3871
3872 // fbits is between 1 and 64 in the worst-case, which means the fmul
3873 // could have 2^64 as an actual operand. Need 65 bits of precision.
3874 APSInt IntVal(65, true);
3875 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
3876
3877 // N.b. isPowerOf2 also checks for > 0.
3878 if (!IsExact || !IntVal.isPowerOf2())
3879 return false;
3880 unsigned FBits = IntVal.logBase2();
3881
3882 // Checks above should have guaranteed that we haven't lost information in
3883 // finding FBits, but it must still be in range.
3884 if (FBits == 0 || FBits > RegWidth) return false;
3885
3886 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
3887 return true;
3888}
3889
3890bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3891 unsigned RegWidth) {
3892 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3893 false);
3894}
3895
3896bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
3897 SDValue &FixedPos,
3898 unsigned RegWidth) {
3899 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3900 true);
3901}
3902
3903// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3904// of the string and obtains the integer values from them and combines these
3905// into a single value to be used in the MRS/MSR instruction.
3908 RegString.split(Fields, ':');
3909
3910 if (Fields.size() == 1)
3911 return -1;
3912
3913 assert(Fields.size() == 5
3914 && "Invalid number of fields in read register string");
3915
3917 bool AllIntFields = true;
3918
3919 for (StringRef Field : Fields) {
3920 unsigned IntField;
3921 AllIntFields &= !Field.getAsInteger(10, IntField);
3922 Ops.push_back(IntField);
3923 }
3924
3925 assert(AllIntFields &&
3926 "Unexpected non-integer value in special register string.");
3927 (void)AllIntFields;
3928
3929 // Need to combine the integer fields of the string into a single value
3930 // based on the bit encoding of MRS/MSR instruction.
3931 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
3932 (Ops[3] << 3) | (Ops[4]);
3933}
3934
3935// Lower the read_register intrinsic to an MRS instruction node if the special
3936// register string argument is either of the form detailed in the ALCE (the
3937// form described in getIntOperandsFromRegsterString) or is a named register
3938// known by the MRS SysReg mapper.
3939bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
3940 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3941 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3942 SDLoc DL(N);
3943
3944 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
3945
3946 unsigned Opcode64Bit = AArch64::MRS;
3947 int Imm = getIntOperandFromRegisterString(RegString->getString());
3948 if (Imm == -1) {
3949 // No match, Use the sysreg mapper to map the remaining possible strings to
3950 // the value for the register to be used for the instruction operand.
3951 const auto *TheReg =
3952 AArch64SysReg::lookupSysRegByName(RegString->getString());
3953 if (TheReg && TheReg->Readable &&
3954 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3955 Imm = TheReg->Encoding;
3956 else
3957 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3958
3959 if (Imm == -1) {
3960 // Still no match, see if this is "pc" or give up.
3961 if (!ReadIs128Bit && RegString->getString() == "pc") {
3962 Opcode64Bit = AArch64::ADR;
3963 Imm = 0;
3964 } else {
3965 return false;
3966 }
3967 }
3968 }
3969
3970 SDValue InChain = N->getOperand(0);
3971 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
3972 if (!ReadIs128Bit) {
3973 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
3974 {SysRegImm, InChain});
3975 } else {
3976 SDNode *MRRS = CurDAG->getMachineNode(
3977 AArch64::MRRS, DL,
3978 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
3979 {SysRegImm, InChain});
3980
3981 // Sysregs are not endian. The even register always contains the low half
3982 // of the register.
3983 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
3984 SDValue(MRRS, 0));
3985 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
3986 SDValue(MRRS, 0));
3987 SDValue OutChain = SDValue(MRRS, 1);
3988
3989 ReplaceUses(SDValue(N, 0), Lo);
3990 ReplaceUses(SDValue(N, 1), Hi);
3991 ReplaceUses(SDValue(N, 2), OutChain);
3992 };
3993 return true;
3994}
3995
3996// Lower the write_register intrinsic to an MSR instruction node if the special
3997// register string argument is either of the form detailed in the ALCE (the
3998// form described in getIntOperandsFromRegsterString) or is a named register
3999// known by the MSR SysReg mapper.
4000bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4001 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4002 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4003 SDLoc DL(N);
4004
4005 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4006
4007 if (!WriteIs128Bit) {
4008 // Check if the register was one of those allowed as the pstatefield value
4009 // in the MSR (immediate) instruction. To accept the values allowed in the
4010 // pstatefield for the MSR (immediate) instruction, we also require that an
4011 // immediate value has been provided as an argument, we know that this is
4012 // the case as it has been ensured by semantic checking.
4013 auto trySelectPState = [&](auto PMapper, unsigned State) {
4014 if (PMapper) {
4015 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4016 "Expected a constant integer expression.");
4017 unsigned Reg = PMapper->Encoding;
4018 uint64_t Immed = N->getConstantOperandVal(2);
4019 CurDAG->SelectNodeTo(
4020 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4021 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4022 return true;
4023 }
4024 return false;
4025 };
4026
4027 if (trySelectPState(
4028 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4029 AArch64::MSRpstateImm4))
4030 return true;
4031 if (trySelectPState(
4032 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4033 AArch64::MSRpstateImm1))
4034 return true;
4035 }
4036
4037 int Imm = getIntOperandFromRegisterString(RegString->getString());
4038 if (Imm == -1) {
4039 // Use the sysreg mapper to attempt to map the remaining possible strings
4040 // to the value for the register to be used for the MSR (register)
4041 // instruction operand.
4042 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4043 if (TheReg && TheReg->Writeable &&
4044 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4045 Imm = TheReg->Encoding;
4046 else
4047 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4048
4049 if (Imm == -1)
4050 return false;
4051 }
4052
4053 SDValue InChain = N->getOperand(0);
4054 if (!WriteIs128Bit) {
4055 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4056 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4057 N->getOperand(2), InChain);
4058 } else {
4059 // No endian swap. The lower half always goes into the even subreg, and the
4060 // higher half always into the odd supreg.
4061 SDNode *Pair = CurDAG->getMachineNode(
4062 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4063 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4064 MVT::i32),
4065 N->getOperand(2),
4066 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4067 N->getOperand(3),
4068 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4069
4070 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4071 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4072 SDValue(Pair, 0), InChain);
4073 }
4074
4075 return true;
4076}
4077
4078/// We've got special pseudo-instructions for these
4079bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4080 unsigned Opcode;
4081 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4082
4083 // Leave IR for LSE if subtarget supports it.
4084 if (Subtarget->hasLSE()) return false;
4085
4086 if (MemTy == MVT::i8)
4087 Opcode = AArch64::CMP_SWAP_8;
4088 else if (MemTy == MVT::i16)
4089 Opcode = AArch64::CMP_SWAP_16;
4090 else if (MemTy == MVT::i32)
4091 Opcode = AArch64::CMP_SWAP_32;
4092 else if (MemTy == MVT::i64)
4093 Opcode = AArch64::CMP_SWAP_64;
4094 else
4095 llvm_unreachable("Unknown AtomicCmpSwap type");
4096
4097 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4098 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4099 N->getOperand(0)};
4100 SDNode *CmpSwap = CurDAG->getMachineNode(
4101 Opcode, SDLoc(N),
4102 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4103
4104 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4105 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4106
4107 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4108 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4109 CurDAG->RemoveDeadNode(N);
4110
4111 return true;
4112}
4113
4114bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4115 SDValue &Shift) {
4116 if (!isa<ConstantSDNode>(N))
4117 return false;
4118
4119 SDLoc DL(N);
4120 uint64_t Val = cast<ConstantSDNode>(N)
4121 ->getAPIntValue()
4122 .trunc(VT.getFixedSizeInBits())
4123 .getZExtValue();
4124
4125 switch (VT.SimpleTy) {
4126 case MVT::i8:
4127 // All immediates are supported.
4128 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4129 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4130 return true;
4131 case MVT::i16:
4132 case MVT::i32:
4133 case MVT::i64:
4134 // Support 8bit unsigned immediates.
4135 if (Val <= 255) {
4136 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4137 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4138 return true;
4139 }
4140 // Support 16bit unsigned immediates that are a multiple of 256.
4141 if (Val <= 65280 && Val % 256 == 0) {
4142 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4143 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4144 return true;
4145 }
4146 break;
4147 default:
4148 break;
4149 }
4150
4151 return false;
4152}
4153
4154bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4155 SDValue &Imm, SDValue &Shift,
4156 bool Negate) {
4157 if (!isa<ConstantSDNode>(N))
4158 return false;
4159
4160 SDLoc DL(N);
4161 int64_t Val = cast<ConstantSDNode>(N)
4162 ->getAPIntValue()
4163 .trunc(VT.getFixedSizeInBits())
4164 .getSExtValue();
4165
4166 if (Negate)
4167 Val = -Val;
4168
4169 // Signed saturating instructions treat their immediate operand as unsigned,
4170 // whereas the related intrinsics define their operands to be signed. This
4171 // means we can only use the immediate form when the operand is non-negative.
4172 if (Val < 0)
4173 return false;
4174
4175 switch (VT.SimpleTy) {
4176 case MVT::i8:
4177 // All positive immediates are supported.
4178 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4179 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4180 return true;
4181 case MVT::i16:
4182 case MVT::i32:
4183 case MVT::i64:
4184 // Support 8bit positive immediates.
4185 if (Val <= 255) {
4186 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4187 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4188 return true;
4189 }
4190 // Support 16bit positive immediates that are a multiple of 256.
4191 if (Val <= 65280 && Val % 256 == 0) {
4192 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4193 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4194 return true;
4195 }
4196 break;
4197 default:
4198 break;
4199 }
4200
4201 return false;
4202}
4203
4204bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4205 SDValue &Shift) {
4206 if (!isa<ConstantSDNode>(N))
4207 return false;
4208
4209 SDLoc DL(N);
4210 int64_t Val = cast<ConstantSDNode>(N)
4211 ->getAPIntValue()
4212 .trunc(VT.getFixedSizeInBits())
4213 .getSExtValue();
4214
4215 switch (VT.SimpleTy) {
4216 case MVT::i8:
4217 // All immediates are supported.
4218 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4219 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4220 return true;
4221 case MVT::i16:
4222 case MVT::i32:
4223 case MVT::i64:
4224 // Support 8bit signed immediates.
4225 if (Val >= -128 && Val <= 127) {
4226 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4227 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4228 return true;
4229 }
4230 // Support 16bit signed immediates that are a multiple of 256.
4231 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
4232 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4233 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
4234 return true;
4235 }
4236 break;
4237 default:
4238 break;
4239 }
4240
4241 return false;
4242}
4243
4244bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4245 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4246 int64_t ImmVal = CNode->getSExtValue();
4247 SDLoc DL(N);
4248 if (ImmVal >= -128 && ImmVal < 128) {
4249 Imm = CurDAG->getSignedConstant(ImmVal, DL, MVT::i32, /*isTarget=*/true);
4250 return true;
4251 }
4252 }
4253 return false;
4254}
4255
4256bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4257 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4258 uint64_t ImmVal = CNode->getZExtValue();
4259
4260 switch (VT.SimpleTy) {
4261 case MVT::i8:
4262 ImmVal &= 0xFF;
4263 break;
4264 case MVT::i16:
4265 ImmVal &= 0xFFFF;
4266 break;
4267 case MVT::i32:
4268 ImmVal &= 0xFFFFFFFF;
4269 break;
4270 case MVT::i64:
4271 break;
4272 default:
4273 llvm_unreachable("Unexpected type");
4274 }
4275
4276 if (ImmVal < 256) {
4277 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4278 return true;
4279 }
4280 }
4281 return false;
4282}
4283
4284bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4285 bool Invert) {
4286 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4287 uint64_t ImmVal = CNode->getZExtValue();
4288 SDLoc DL(N);
4289
4290 if (Invert)
4291 ImmVal = ~ImmVal;
4292
4293 // Shift mask depending on type size.
4294 switch (VT.SimpleTy) {
4295 case MVT::i8:
4296 ImmVal &= 0xFF;
4297 ImmVal |= ImmVal << 8;
4298 ImmVal |= ImmVal << 16;
4299 ImmVal |= ImmVal << 32;
4300 break;
4301 case MVT::i16:
4302 ImmVal &= 0xFFFF;
4303 ImmVal |= ImmVal << 16;
4304 ImmVal |= ImmVal << 32;
4305 break;
4306 case MVT::i32:
4307 ImmVal &= 0xFFFFFFFF;
4308 ImmVal |= ImmVal << 32;
4309 break;
4310 case MVT::i64:
4311 break;
4312 default:
4313 llvm_unreachable("Unexpected type");
4314 }
4315
4316 uint64_t encoding;
4317 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4318 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4319 return true;
4320 }
4321 }
4322 return false;
4323}
4324
4325// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4326// Rather than attempt to normalise everything we can sometimes saturate the
4327// shift amount during selection. This function also allows for consistent
4328// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4329// required by the instructions.
4330bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4331 uint64_t High, bool AllowSaturation,
4332 SDValue &Imm) {
4333 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4334 uint64_t ImmVal = CN->getZExtValue();
4335
4336 // Reject shift amounts that are too small.
4337 if (ImmVal < Low)
4338 return false;
4339
4340 // Reject or saturate shift amounts that are too big.
4341 if (ImmVal > High) {
4342 if (!AllowSaturation)
4343 return false;
4344 ImmVal = High;
4345 }
4346
4347 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4348 return true;
4349 }
4350
4351 return false;
4352}
4353
4354bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4355 // tagp(FrameIndex, IRGstack, tag_offset):
4356 // since the offset between FrameIndex and IRGstack is a compile-time
4357 // constant, this can be lowered to a single ADDG instruction.
4358 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4359 return false;
4360 }
4361
4362 SDValue IRG_SP = N->getOperand(2);
4363 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4364 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4365 return false;
4366 }
4367
4368 const TargetLowering *TLI = getTargetLowering();
4369 SDLoc DL(N);
4370 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4371 SDValue FiOp = CurDAG->getTargetFrameIndex(
4372 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4373 int TagOffset = N->getConstantOperandVal(3);
4374
4375 SDNode *Out = CurDAG->getMachineNode(
4376 AArch64::TAGPstack, DL, MVT::i64,
4377 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4378 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4379 ReplaceNode(N, Out);
4380 return true;
4381}
4382
4383void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4384 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4385 "llvm.aarch64.tagp third argument must be an immediate");
4386 if (trySelectStackSlotTagP(N))
4387 return;
4388 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4389 // compile-time constant, not just for stack allocations.
4390
4391 // General case for unrelated pointers in Op1 and Op2.
4392 SDLoc DL(N);
4393 int TagOffset = N->getConstantOperandVal(3);
4394 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4395 {N->getOperand(1), N->getOperand(2)});
4396 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4397 {SDValue(N1, 0), N->getOperand(2)});
4398 SDNode *N3 = CurDAG->getMachineNode(
4399 AArch64::ADDG, DL, MVT::i64,
4400 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4401 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4402 ReplaceNode(N, N3);
4403}
4404
4405bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4406 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4407
4408 // Bail when not a "cast" like insert_subvector.
4409 if (N->getConstantOperandVal(2) != 0)
4410 return false;
4411 if (!N->getOperand(0).isUndef())
4412 return false;
4413
4414 // Bail when normal isel should do the job.
4415 EVT VT = N->getValueType(0);
4416 EVT InVT = N->getOperand(1).getValueType();
4417 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4418 return false;
4419 if (InVT.getSizeInBits() <= 128)
4420 return false;
4421
4422 // NOTE: We can only get here when doing fixed length SVE code generation.
4423 // We do manual selection because the types involved are not linked to real
4424 // registers (despite being legal) and must be coerced into SVE registers.
4425
4427 "Expected to insert into a packed scalable vector!");
4428
4429 SDLoc DL(N);
4430 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4431 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4432 N->getOperand(1), RC));
4433 return true;
4434}
4435
4436bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4437 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4438
4439 // Bail when not a "cast" like extract_subvector.
4440 if (N->getConstantOperandVal(1) != 0)
4441 return false;
4442
4443 // Bail when normal isel can do the job.
4444 EVT VT = N->getValueType(0);
4445 EVT InVT = N->getOperand(0).getValueType();
4446 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4447 return false;
4448 if (VT.getSizeInBits() <= 128)
4449 return false;
4450
4451 // NOTE: We can only get here when doing fixed length SVE code generation.
4452 // We do manual selection because the types involved are not linked to real
4453 // registers (despite being legal) and must be coerced into SVE registers.
4454
4456 "Expected to extract from a packed scalable vector!");
4457
4458 SDLoc DL(N);
4459 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4460 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4461 N->getOperand(0), RC));
4462 return true;
4463}
4464
4465bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4466 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4467
4468 SDValue N0 = N->getOperand(0);
4469 SDValue N1 = N->getOperand(1);
4470 EVT VT = N->getValueType(0);
4471
4472 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4473 // Rotate by a constant is a funnel shift in IR which is exanded to
4474 // an OR with shifted operands.
4475 // We do the following transform:
4476 // OR N0, N1 -> xar (x, y, imm)
4477 // Where:
4478 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4479 // N0 = SHL_PRED true, V, splat(bits-imm)
4480 // V = (xor x, y)
4481 if (VT.isScalableVector() &&
4482 (Subtarget->hasSVE2() ||
4483 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4484 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4486 std::swap(N0, N1);
4487 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4489 return false;
4490
4491 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4492 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4493 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4494 return false;
4495
4496 SDValue XOR = N0.getOperand(1);
4497 if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(1))
4498 return false;
4499
4500 APInt ShlAmt, ShrAmt;
4501 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4503 return false;
4504
4505 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4506 return false;
4507
4508 SDLoc DL(N);
4509 SDValue Imm =
4510 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4511
4512 SDValue Ops[] = {XOR.getOperand(0), XOR.getOperand(1), Imm};
4513 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4514 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4515 AArch64::XAR_ZZZI_D})) {
4516 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4517 return true;
4518 }
4519 return false;
4520 }
4521
4522 if (!Subtarget->hasSHA3())
4523 return false;
4524
4525 if (N0->getOpcode() != AArch64ISD::VSHL ||
4527 return false;
4528
4529 if (N0->getOperand(0) != N1->getOperand(0) ||
4530 N1->getOperand(0)->getOpcode() != ISD::XOR)
4531 return false;
4532
4533 SDValue XOR = N0.getOperand(0);
4534 SDValue R1 = XOR.getOperand(0);
4535 SDValue R2 = XOR.getOperand(1);
4536
4537 unsigned HsAmt = N0.getConstantOperandVal(1);
4538 unsigned ShAmt = N1.getConstantOperandVal(1);
4539
4540 SDLoc DL = SDLoc(N0.getOperand(1));
4541 SDValue Imm = CurDAG->getTargetConstant(
4542 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4543
4544 if (ShAmt + HsAmt != 64)
4545 return false;
4546
4547 SDValue Ops[] = {R1, R2, Imm};
4548 CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
4549
4550 return true;
4551}
4552
4553void AArch64DAGToDAGISel::Select(SDNode *Node) {
4554 // If we have a custom node, we already have selected!
4555 if (Node->isMachineOpcode()) {
4556 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4557 Node->setNodeId(-1);
4558 return;
4559 }
4560
4561 // Few custom selection stuff.
4562 EVT VT = Node->getValueType(0);
4563
4564 switch (Node->getOpcode()) {
4565 default:
4566 break;
4567
4569 if (SelectCMP_SWAP(Node))
4570 return;
4571 break;
4572
4573 case ISD::READ_REGISTER:
4574 case AArch64ISD::MRRS:
4575 if (tryReadRegister(Node))
4576 return;
4577 break;
4578
4580 case AArch64ISD::MSRR:
4581 if (tryWriteRegister(Node))
4582 return;
4583 break;
4584
4585 case ISD::LOAD: {
4586 // Try to select as an indexed load. Fall through to normal processing
4587 // if we can't.
4588 if (tryIndexedLoad(Node))
4589 return;
4590 break;
4591 }
4592
4593 case ISD::SRL:
4594 case ISD::AND:
4595 case ISD::SRA:
4597 if (tryBitfieldExtractOp(Node))
4598 return;
4599 if (tryBitfieldInsertInZeroOp(Node))
4600 return;
4601 [[fallthrough]];
4602 case ISD::ROTR:
4603 case ISD::SHL:
4604 if (tryShiftAmountMod(Node))
4605 return;
4606 break;
4607
4608 case ISD::SIGN_EXTEND:
4609 if (tryBitfieldExtractOpFromSExt(Node))
4610 return;
4611 break;
4612
4613 case ISD::OR:
4614 if (tryBitfieldInsertOp(Node))
4615 return;
4616 if (trySelectXAR(Node))
4617 return;
4618 break;
4619
4621 if (trySelectCastScalableToFixedLengthVector(Node))
4622 return;
4623 break;
4624 }
4625
4626 case ISD::INSERT_SUBVECTOR: {
4627 if (trySelectCastFixedLengthToScalableVector(Node))
4628 return;
4629 break;
4630 }
4631
4632 case ISD::Constant: {
4633 // Materialize zero constants as copies from WZR/XZR. This allows
4634 // the coalescer to propagate these into other instructions.
4635 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4636 if (ConstNode->isZero()) {
4637 if (VT == MVT::i32) {
4638 SDValue New = CurDAG->getCopyFromReg(
4639 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4640 ReplaceNode(Node, New.getNode());
4641 return;
4642 } else if (VT == MVT::i64) {
4643 SDValue New = CurDAG->getCopyFromReg(
4644 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4645 ReplaceNode(Node, New.getNode());
4646 return;
4647 }
4648 }
4649 break;
4650 }
4651
4652 case ISD::FrameIndex: {
4653 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4654 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4655 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4656 const TargetLowering *TLI = getTargetLowering();
4657 SDValue TFI = CurDAG->getTargetFrameIndex(
4658 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4659 SDLoc DL(Node);
4660 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4661 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4662 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4663 return;
4664 }
4666 unsigned IntNo = Node->getConstantOperandVal(1);
4667 switch (IntNo) {
4668 default:
4669 break;
4670 case Intrinsic::aarch64_gcsss: {
4671 SDLoc DL(Node);
4672 SDValue Chain = Node->getOperand(0);
4673 SDValue Val = Node->getOperand(2);
4674 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4675 SDNode *SS1 =
4676 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4677 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4678 MVT::Other, Zero, SDValue(SS1, 0));
4679 ReplaceNode(Node, SS2);
4680 return;
4681 }
4682 case Intrinsic::aarch64_ldaxp:
4683 case Intrinsic::aarch64_ldxp: {
4684 unsigned Op =
4685 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4686 SDValue MemAddr = Node->getOperand(2);
4687 SDLoc DL(Node);
4688 SDValue Chain = Node->getOperand(0);
4689
4690 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4691 MVT::Other, MemAddr, Chain);
4692
4693 // Transfer memoperands.
4695 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4696 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4697 ReplaceNode(Node, Ld);
4698 return;
4699 }
4700 case Intrinsic::aarch64_stlxp:
4701 case Intrinsic::aarch64_stxp: {
4702 unsigned Op =
4703 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4704 SDLoc DL(Node);
4705 SDValue Chain = Node->getOperand(0);
4706 SDValue ValLo = Node->getOperand(2);
4707 SDValue ValHi = Node->getOperand(3);
4708 SDValue MemAddr = Node->getOperand(4);
4709
4710 // Place arguments in the right order.
4711 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4712
4713 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4714 // Transfer memoperands.
4716 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4717 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4718
4719 ReplaceNode(Node, St);
4720 return;
4721 }
4722 case Intrinsic::aarch64_neon_ld1x2:
4723 if (VT == MVT::v8i8) {
4724 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4725 return;
4726 } else if (VT == MVT::v16i8) {
4727 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4728 return;
4729 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4730 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4731 return;
4732 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4733 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4734 return;
4735 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4736 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4737 return;
4738 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4739 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4740 return;
4741 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4742 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4743 return;
4744 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4745 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4746 return;
4747 }
4748 break;
4749 case Intrinsic::aarch64_neon_ld1x3:
4750 if (VT == MVT::v8i8) {
4751 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4752 return;
4753 } else if (VT == MVT::v16i8) {
4754 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4755 return;
4756 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4757 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4758 return;
4759 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4760 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4761 return;
4762 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4763 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4764 return;
4765 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4766 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4767 return;
4768 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4769 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4770 return;
4771 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4772 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
4773 return;
4774 }
4775 break;
4776 case Intrinsic::aarch64_neon_ld1x4:
4777 if (VT == MVT::v8i8) {
4778 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
4779 return;
4780 } else if (VT == MVT::v16i8) {
4781 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
4782 return;
4783 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4784 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
4785 return;
4786 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4787 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
4788 return;
4789 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4790 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
4791 return;
4792 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4793 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
4794 return;
4795 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4796 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4797 return;
4798 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4799 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
4800 return;
4801 }
4802 break;
4803 case Intrinsic::aarch64_neon_ld2:
4804 if (VT == MVT::v8i8) {
4805 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
4806 return;
4807 } else if (VT == MVT::v16i8) {
4808 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
4809 return;
4810 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4811 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
4812 return;
4813 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4814 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
4815 return;
4816 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4817 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
4818 return;
4819 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4820 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
4821 return;
4822 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4823 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4824 return;
4825 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4826 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
4827 return;
4828 }
4829 break;
4830 case Intrinsic::aarch64_neon_ld3:
4831 if (VT == MVT::v8i8) {
4832 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
4833 return;
4834 } else if (VT == MVT::v16i8) {
4835 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
4836 return;
4837 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4838 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
4839 return;
4840 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4841 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
4842 return;
4843 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4844 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
4845 return;
4846 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4847 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
4848 return;
4849 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4850 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4851 return;
4852 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4853 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
4854 return;
4855 }
4856 break;
4857 case Intrinsic::aarch64_neon_ld4:
4858 if (VT == MVT::v8i8) {
4859 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
4860 return;
4861 } else if (VT == MVT::v16i8) {
4862 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
4863 return;
4864 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4865 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
4866 return;
4867 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4868 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
4869 return;
4870 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4871 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
4872 return;
4873 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4874 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
4875 return;
4876 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4877 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4878 return;
4879 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4880 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
4881 return;
4882 }
4883 break;
4884 case Intrinsic::aarch64_neon_ld2r:
4885 if (VT == MVT::v8i8) {
4886 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
4887 return;
4888 } else if (VT == MVT::v16i8) {
4889 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
4890 return;
4891 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4892 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
4893 return;
4894 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4895 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
4896 return;
4897 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4898 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
4899 return;
4900 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4901 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
4902 return;
4903 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4904 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
4905 return;
4906 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4907 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
4908 return;
4909 }
4910 break;
4911 case Intrinsic::aarch64_neon_ld3r:
4912 if (VT == MVT::v8i8) {
4913 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
4914 return;
4915 } else if (VT == MVT::v16i8) {
4916 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
4917 return;
4918 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4919 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
4920 return;
4921 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4922 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
4923 return;
4924 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4925 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
4926 return;
4927 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4928 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
4929 return;
4930 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4931 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
4932 return;
4933 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4934 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
4935 return;
4936 }
4937 break;
4938 case Intrinsic::aarch64_neon_ld4r:
4939 if (VT == MVT::v8i8) {
4940 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
4941 return;
4942 } else if (VT == MVT::v16i8) {
4943 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
4944 return;
4945 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4946 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
4947 return;
4948 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4949 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
4950 return;
4951 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4952 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
4953 return;
4954 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4955 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
4956 return;
4957 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4958 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
4959 return;
4960 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4961 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
4962 return;
4963 }
4964 break;
4965 case Intrinsic::aarch64_neon_ld2lane:
4966 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4967 SelectLoadLane(Node, 2, AArch64::LD2i8);
4968 return;
4969 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4970 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4971 SelectLoadLane(Node, 2, AArch64::LD2i16);
4972 return;
4973 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4974 VT == MVT::v2f32) {
4975 SelectLoadLane(Node, 2, AArch64::LD2i32);
4976 return;
4977 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4978 VT == MVT::v1f64) {
4979 SelectLoadLane(Node, 2, AArch64::LD2i64);
4980 return;
4981 }
4982 break;
4983 case Intrinsic::aarch64_neon_ld3lane:
4984 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4985 SelectLoadLane(Node, 3, AArch64::LD3i8);
4986 return;
4987 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4988 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4989 SelectLoadLane(Node, 3, AArch64::LD3i16);
4990 return;
4991 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4992 VT == MVT::v2f32) {
4993 SelectLoadLane(Node, 3, AArch64::LD3i32);
4994 return;
4995 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4996 VT == MVT::v1f64) {
4997 SelectLoadLane(Node, 3, AArch64::LD3i64);
4998 return;
4999 }
5000 break;
5001 case Intrinsic::aarch64_neon_ld4lane:
5002 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5003 SelectLoadLane(Node, 4, AArch64::LD4i8);
5004 return;
5005 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5006 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5007 SelectLoadLane(Node, 4, AArch64::LD4i16);
5008 return;
5009 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5010 VT == MVT::v2f32) {
5011 SelectLoadLane(Node, 4, AArch64::LD4i32);
5012 return;
5013 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5014 VT == MVT::v1f64) {
5015 SelectLoadLane(Node, 4, AArch64::LD4i64);
5016 return;
5017 }
5018 break;
5019 case Intrinsic::aarch64_ld64b:
5020 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5021 return;
5022 case Intrinsic::aarch64_sve_ld2q_sret: {
5023 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5024 return;
5025 }
5026 case Intrinsic::aarch64_sve_ld3q_sret: {
5027 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5028 return;
5029 }
5030 case Intrinsic::aarch64_sve_ld4q_sret: {
5031 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5032 return;
5033 }
5034 case Intrinsic::aarch64_sve_ld2_sret: {
5035 if (VT == MVT::nxv16i8) {
5036 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5037 true);
5038 return;
5039 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5040 VT == MVT::nxv8bf16) {
5041 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5042 true);
5043 return;
5044 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5045 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5046 true);
5047 return;
5048 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5049 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5050 true);
5051 return;
5052 }
5053 break;
5054 }
5055 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5056 if (VT == MVT::nxv16i8) {
5057 if (Subtarget->hasSME2())
5058 SelectContiguousMultiVectorLoad(
5059 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5060 else if (Subtarget->hasSVE2p1())
5061 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5062 AArch64::LD1B_2Z);
5063 else
5064 break;
5065 return;
5066 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5067 VT == MVT::nxv8bf16) {
5068 if (Subtarget->hasSME2())
5069 SelectContiguousMultiVectorLoad(
5070 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5071 else if (Subtarget->hasSVE2p1())
5072 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5073 AArch64::LD1H_2Z);
5074 else
5075 break;
5076 return;
5077 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5078 if (Subtarget->hasSME2())
5079 SelectContiguousMultiVectorLoad(
5080 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5081 else if (Subtarget->hasSVE2p1())
5082 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5083 AArch64::LD1W_2Z);
5084 else
5085 break;
5086 return;
5087 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5088 if (Subtarget->hasSME2())
5089 SelectContiguousMultiVectorLoad(
5090 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5091 else if (Subtarget->hasSVE2p1())
5092 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5093 AArch64::LD1D_2Z);
5094 else
5095 break;
5096 return;
5097 }
5098 break;
5099 }
5100 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5101 if (VT == MVT::nxv16i8) {
5102 if (Subtarget->hasSME2())
5103 SelectContiguousMultiVectorLoad(
5104 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5105 else if (Subtarget->hasSVE2p1())
5106 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5107 AArch64::LD1B_4Z);
5108 else
5109 break;
5110 return;
5111 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5112 VT == MVT::nxv8bf16) {
5113 if (Subtarget->hasSME2())
5114 SelectContiguousMultiVectorLoad(
5115 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5116 else if (Subtarget->hasSVE2p1())
5117 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5118 AArch64::LD1H_4Z);
5119 else
5120 break;
5121 return;
5122 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5123 if (Subtarget->hasSME2())
5124 SelectContiguousMultiVectorLoad(
5125 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5126 else if (Subtarget->hasSVE2p1())
5127 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5128 AArch64::LD1W_4Z);
5129 else
5130 break;
5131 return;
5132 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5133 if (Subtarget->hasSME2())
5134 SelectContiguousMultiVectorLoad(
5135 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5136 else if (Subtarget->hasSVE2p1())
5137 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5138 AArch64::LD1D_4Z);
5139 else
5140 break;
5141 return;
5142 }
5143 break;
5144 }
5145 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5146 if (VT == MVT::nxv16i8) {
5147 if (Subtarget->hasSME2())
5148 SelectContiguousMultiVectorLoad(Node, 2, 0,
5149 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5150 AArch64::LDNT1B_2Z_PSEUDO);
5151 else if (Subtarget->hasSVE2p1())
5152 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5153 AArch64::LDNT1B_2Z);
5154 else
5155 break;
5156 return;
5157 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5158 VT == MVT::nxv8bf16) {
5159 if (Subtarget->hasSME2())
5160 SelectContiguousMultiVectorLoad(Node, 2, 1,
5161 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5162 AArch64::LDNT1H_2Z_PSEUDO);
5163 else if (Subtarget->hasSVE2p1())
5164 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5165 AArch64::LDNT1H_2Z);
5166 else
5167 break;
5168 return;
5169 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5170 if (Subtarget->hasSME2())
5171 SelectContiguousMultiVectorLoad(Node, 2, 2,
5172 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5173 AArch64::LDNT1W_2Z_PSEUDO);
5174 else if (Subtarget->hasSVE2p1())
5175 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5176 AArch64::LDNT1W_2Z);
5177 else
5178 break;
5179 return;
5180 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5181 if (Subtarget->hasSME2())
5182 SelectContiguousMultiVectorLoad(Node, 2, 3,
5183 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5184 AArch64::LDNT1D_2Z_PSEUDO);
5185 else if (Subtarget->hasSVE2p1())
5186 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5187 AArch64::LDNT1D_2Z);
5188 else
5189 break;
5190 return;
5191 }
5192 break;
5193 }
5194 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5195 if (VT == MVT::nxv16i8) {
5196 if (Subtarget->hasSME2())
5197 SelectContiguousMultiVectorLoad(Node, 4, 0,
5198 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5199 AArch64::LDNT1B_4Z_PSEUDO);
5200 else if (Subtarget->hasSVE2p1())
5201 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5202 AArch64::LDNT1B_4Z);
5203 else
5204 break;
5205 return;
5206 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5207 VT == MVT::nxv8bf16) {
5208 if (Subtarget->hasSME2())
5209 SelectContiguousMultiVectorLoad(Node, 4, 1,
5210 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5211 AArch64::LDNT1H_4Z_PSEUDO);
5212 else if (Subtarget->hasSVE2p1())
5213 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5214 AArch64::LDNT1H_4Z);
5215 else
5216 break;
5217 return;
5218 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5219 if (Subtarget->hasSME2())
5220 SelectContiguousMultiVectorLoad(Node, 4, 2,
5221 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5222 AArch64::LDNT1W_4Z_PSEUDO);
5223 else if (Subtarget->hasSVE2p1())
5224 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5225 AArch64::LDNT1W_4Z);
5226 else
5227 break;
5228 return;
5229 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5230 if (Subtarget->hasSME2())
5231 SelectContiguousMultiVectorLoad(Node, 4, 3,
5232 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5233 AArch64::LDNT1D_4Z_PSEUDO);
5234 else if (Subtarget->hasSVE2p1())
5235 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5236 AArch64::LDNT1D_4Z);
5237 else
5238 break;
5239 return;
5240 }
5241 break;
5242 }
5243 case Intrinsic::aarch64_sve_ld3_sret: {
5244 if (VT == MVT::nxv16i8) {
5245 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5246 true);
5247 return;
5248 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5249 VT == MVT::nxv8bf16) {
5250 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5251 true);
5252 return;
5253 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5254 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5255 true);
5256 return;
5257 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5258 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5259 true);
5260 return;
5261 }
5262 break;
5263 }
5264 case Intrinsic::aarch64_sve_ld4_sret: {
5265 if (VT == MVT::nxv16i8) {
5266 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5267 true);
5268 return;
5269 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5270 VT == MVT::nxv8bf16) {
5271 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5272 true);
5273 return;
5274 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5275 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5276 true);
5277 return;
5278 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5279 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5280 true);
5281 return;
5282 }
5283 break;
5284 }
5285 case Intrinsic::aarch64_sme_read_hor_vg2: {
5286 if (VT == MVT::nxv16i8) {
5287 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5288 AArch64::MOVA_2ZMXI_H_B);
5289 return;
5290 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5291 VT == MVT::nxv8bf16) {
5292 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5293 AArch64::MOVA_2ZMXI_H_H);
5294 return;
5295 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5296 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5297 AArch64::MOVA_2ZMXI_H_S);
5298 return;
5299 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5300 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5301 AArch64::MOVA_2ZMXI_H_D);
5302 return;
5303 }
5304 break;
5305 }
5306 case Intrinsic::aarch64_sme_read_ver_vg2: {
5307 if (VT == MVT::nxv16i8) {
5308 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5309 AArch64::MOVA_2ZMXI_V_B);
5310 return;
5311 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5312 VT == MVT::nxv8bf16) {
5313 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5314 AArch64::MOVA_2ZMXI_V_H);
5315 return;
5316 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5317 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5318 AArch64::MOVA_2ZMXI_V_S);
5319 return;
5320 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5321 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5322 AArch64::MOVA_2ZMXI_V_D);
5323 return;
5324 }
5325 break;
5326 }
5327 case Intrinsic::aarch64_sme_read_hor_vg4: {
5328 if (VT == MVT::nxv16i8) {
5329 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5330 AArch64::MOVA_4ZMXI_H_B);
5331 return;
5332 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5333 VT == MVT::nxv8bf16) {
5334 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5335 AArch64::MOVA_4ZMXI_H_H);
5336 return;
5337 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5338 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5339 AArch64::MOVA_4ZMXI_H_S);
5340 return;
5341 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5342 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5343 AArch64::MOVA_4ZMXI_H_D);
5344 return;
5345 }
5346 break;
5347 }
5348 case Intrinsic::aarch64_sme_read_ver_vg4: {
5349 if (VT == MVT::nxv16i8) {
5350 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5351 AArch64::MOVA_4ZMXI_V_B);
5352 return;
5353 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5354 VT == MVT::nxv8bf16) {
5355 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5356 AArch64::MOVA_4ZMXI_V_H);
5357 return;
5358 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5359 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5360 AArch64::MOVA_4ZMXI_V_S);
5361 return;
5362 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5363 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5364 AArch64::MOVA_4ZMXI_V_D);
5365 return;
5366 }
5367 break;
5368 }
5369 case Intrinsic::aarch64_sme_read_vg1x2: {
5370 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5371 AArch64::MOVA_VG2_2ZMXI);
5372 return;
5373 }
5374 case Intrinsic::aarch64_sme_read_vg1x4: {
5375 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5376 AArch64::MOVA_VG4_4ZMXI);
5377 return;
5378 }
5379 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5380 if (VT == MVT::nxv16i8) {
5381 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5382 return;
5383 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5384 VT == MVT::nxv8bf16) {
5385 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5386 return;
5387 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5388 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5389 return;
5390 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5391 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5392 return;
5393 }
5394 break;
5395 }
5396 case Intrinsic::aarch64_sme_readz_vert_x2: {
5397 if (VT == MVT::nxv16i8) {
5398 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5399 return;
5400 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5401 VT == MVT::nxv8bf16) {
5402 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5403 return;
5404 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5405 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5406 return;
5407 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5408 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5409 return;
5410 }
5411 break;
5412 }
5413 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5414 if (VT == MVT::nxv16i8) {
5415 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5416 return;
5417 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5418 VT == MVT::nxv8bf16) {
5419 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5420 return;
5421 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5422 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5423 return;
5424 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5425 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5426 return;
5427 }
5428 break;
5429 }
5430 case Intrinsic::aarch64_sme_readz_vert_x4: {
5431 if (VT == MVT::nxv16i8) {
5432 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5433 return;
5434 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5435 VT == MVT::nxv8bf16) {
5436 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5437 return;
5438 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5439 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5440 return;
5441 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5442 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5443 return;
5444 }
5445 break;
5446 }
5447 case Intrinsic::aarch64_sme_readz_x2: {
5448 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5449 AArch64::ZA);
5450 return;
5451 }
5452 case Intrinsic::aarch64_sme_readz_x4: {
5453 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5454 AArch64::ZA);
5455 return;
5456 }
5457 case Intrinsic::swift_async_context_addr: {
5458 SDLoc DL(Node);
5459 SDValue Chain = Node->getOperand(0);
5460 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5461 SDValue Res = SDValue(
5462 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5463 CurDAG->getTargetConstant(8, DL, MVT::i32),
5464 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5465 0);
5466 ReplaceUses(SDValue(Node, 0), Res);
5467 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5468 CurDAG->RemoveDeadNode(Node);
5469
5470 auto &MF = CurDAG->getMachineFunction();
5471 MF.getFrameInfo().setFrameAddressIsTaken(true);
5472 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5473 return;
5474 }
5475 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5476 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5477 Node->getValueType(0),
5478 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5479 AArch64::LUTI2_4ZTZI_S}))
5480 // Second Immediate must be <= 3:
5481 SelectMultiVectorLuti(Node, 4, Opc, 3);
5482 return;
5483 }
5484 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5485 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5486 Node->getValueType(0),
5487 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5488 // Second Immediate must be <= 1:
5489 SelectMultiVectorLuti(Node, 4, Opc, 1);
5490 return;
5491 }
5492 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5493 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5494 Node->getValueType(0),
5495 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5496 AArch64::LUTI2_2ZTZI_S}))
5497 // Second Immediate must be <= 7:
5498 SelectMultiVectorLuti(Node, 2, Opc, 7);
5499 return;
5500 }
5501 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5502 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5503 Node->getValueType(0),
5504 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5505 AArch64::LUTI4_2ZTZI_S}))
5506 // Second Immediate must be <= 3:
5507 SelectMultiVectorLuti(Node, 2, Opc, 3);
5508 return;
5509 }
5510 }
5511 } break;
5513 unsigned IntNo = Node->getConstantOperandVal(0);
5514 switch (IntNo) {
5515 default:
5516 break;
5517 case Intrinsic::aarch64_tagp:
5518 SelectTagP(Node);
5519 return;
5520
5521 case Intrinsic::ptrauth_auth:
5522 SelectPtrauthAuth(Node);
5523 return;
5524
5525 case Intrinsic::ptrauth_resign:
5526 SelectPtrauthResign(Node);
5527 return;
5528
5529 case Intrinsic::aarch64_neon_tbl2:
5530 SelectTable(Node, 2,
5531 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5532 false);
5533 return;
5534 case Intrinsic::aarch64_neon_tbl3:
5535 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5536 : AArch64::TBLv16i8Three,
5537 false);
5538 return;
5539 case Intrinsic::aarch64_neon_tbl4:
5540 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5541 : AArch64::TBLv16i8Four,
5542 false);
5543 return;
5544 case Intrinsic::aarch64_neon_tbx2:
5545 SelectTable(Node, 2,
5546 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5547 true);
5548 return;
5549 case Intrinsic::aarch64_neon_tbx3:
5550 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5551 : AArch64::TBXv16i8Three,
5552 true);
5553 return;
5554 case Intrinsic::aarch64_neon_tbx4:
5555 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5556 : AArch64::TBXv16i8Four,
5557 true);
5558 return;
5559 case Intrinsic::aarch64_sve_srshl_single_x2:
5560 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5561 Node->getValueType(0),
5562 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5563 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5564 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5565 return;
5566 case Intrinsic::aarch64_sve_srshl_single_x4:
5567 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5568 Node->getValueType(0),
5569 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5570 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5571 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5572 return;
5573 case Intrinsic::aarch64_sve_urshl_single_x2:
5574 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5575 Node->getValueType(0),
5576 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5577 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5578 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5579 return;
5580 case Intrinsic::aarch64_sve_urshl_single_x4:
5581 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5582 Node->getValueType(0),
5583 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5584 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5585 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5586 return;
5587 case Intrinsic::aarch64_sve_srshl_x2:
5588 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5589 Node->getValueType(0),
5590 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5591 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5592 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5593 return;
5594 case Intrinsic::aarch64_sve_srshl_x4:
5595 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5596 Node->getValueType(0),
5597 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5598 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5599 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5600 return;
5601 case Intrinsic::aarch64_sve_urshl_x2:
5602 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5603 Node->getValueType(0),
5604 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5605 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5606 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5607 return;
5608 case Intrinsic::aarch64_sve_urshl_x4:
5609 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5610 Node->getValueType(0),
5611 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5612 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5613 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5614 return;
5615 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5616 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5617 Node->getValueType(0),
5618 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5619 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5620 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5621 return;
5622 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5623 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5624 Node->getValueType(0),
5625 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5626 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5627 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5628 return;
5629 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5630 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5631 Node->getValueType(0),
5632 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5633 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5634 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5635 return;
5636 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5637 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5638 Node->getValueType(0),
5639 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5640 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5641 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5642 return;
5643 case Intrinsic::aarch64_sve_whilege_x2:
5644 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5645 Node->getValueType(0),
5646 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5647 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5648 SelectWhilePair(Node, Op);
5649 return;
5650 case Intrinsic::aarch64_sve_whilegt_x2:
5651 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5652 Node->getValueType(0),
5653 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5654 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5655 SelectWhilePair(Node, Op);
5656 return;
5657 case Intrinsic::aarch64_sve_whilehi_x2:
5658 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5659 Node->getValueType(0),
5660 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5661 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5662 SelectWhilePair(Node, Op);
5663 return;
5664 case Intrinsic::aarch64_sve_whilehs_x2:
5665 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5666 Node->getValueType(0),
5667 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5668 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5669 SelectWhilePair(Node, Op);
5670 return;
5671 case Intrinsic::aarch64_sve_whilele_x2:
5672 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5673 Node->getValueType(0),
5674 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5675 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5676 SelectWhilePair(Node, Op);
5677 return;
5678 case Intrinsic::aarch64_sve_whilelo_x2:
5679 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5680 Node->getValueType(0),
5681 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5682 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5683 SelectWhilePair(Node, Op);
5684 return;
5685 case Intrinsic::aarch64_sve_whilels_x2:
5686 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5687 Node->getValueType(0),
5688 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5689 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5690 SelectWhilePair(Node, Op);
5691 return;
5692 case Intrinsic::aarch64_sve_whilelt_x2:
5693 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5694 Node->getValueType(0),
5695 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5696 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5697 SelectWhilePair(Node, Op);
5698 return;
5699 case Intrinsic::aarch64_sve_smax_single_x2:
5700 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5701 Node->getValueType(0),
5702 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
5703 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
5704 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5705 return;
5706 case Intrinsic::aarch64_sve_umax_single_x2:
5707 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5708 Node->getValueType(0),
5709 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
5710 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
5711 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5712 return;
5713 case Intrinsic::aarch64_sve_fmax_single_x2:
5714 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5715 Node->getValueType(0),
5716 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
5717 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
5718 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5719 return;
5720 case Intrinsic::aarch64_sve_smax_single_x4:
5721 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5722 Node->getValueType(0),
5723 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
5724 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
5725 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5726 return;
5727 case Intrinsic::aarch64_sve_umax_single_x4:
5728 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5729 Node->getValueType(0),
5730 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
5731 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
5732 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5733 return;
5734 case Intrinsic::aarch64_sve_fmax_single_x4:
5735 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5736 Node->getValueType(0),
5737 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
5738 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
5739 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5740 return;
5741 case Intrinsic::aarch64_sve_smin_single_x2:
5742 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5743 Node->getValueType(0),
5744 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
5745 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
5746 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5747 return;
5748 case Intrinsic::aarch64_sve_umin_single_x2:
5749 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5750 Node->getValueType(0),
5751 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
5752 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
5753 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5754 return;
5755 case Intrinsic::aarch64_sve_fmin_single_x2:
5756 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5757 Node->getValueType(0),
5758 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
5759 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
5760 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5761 return;
5762 case Intrinsic::aarch64_sve_smin_single_x4:
5763 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5764 Node->getValueType(0),
5765 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
5766 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
5767 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5768 return;
5769 case Intrinsic::aarch64_sve_umin_single_x4:
5770 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5771 Node->getValueType(0),
5772 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
5773 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
5774 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5775 return;
5776 case Intrinsic::aarch64_sve_fmin_single_x4:
5777 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5778 Node->getValueType(0),
5779 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
5780 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
5781 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5782 return;
5783 case Intrinsic::aarch64_sve_smax_x2:
5784 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5785 Node->getValueType(0),
5786 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
5787 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
5788 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5789 return;
5790 case Intrinsic::aarch64_sve_umax_x2:
5791 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5792 Node->getValueType(0),
5793 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
5794 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
5795 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5796 return;
5797 case Intrinsic::aarch64_sve_fmax_x2:
5798 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5799 Node->getValueType(0),
5800 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
5801 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
5802 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5803 return;
5804 case Intrinsic::aarch64_sve_smax_x4:
5805 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5806 Node->getValueType(0),
5807 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
5808 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
5809 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5810 return;
5811 case Intrinsic::aarch64_sve_umax_x4:
5812 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5813 Node->getValueType(0),
5814 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
5815 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
5816 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5817 return;
5818 case Intrinsic::aarch64_sve_fmax_x4:
5819 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5820 Node->getValueType(0),
5821 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
5822 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
5823 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5824 return;
5825 case Intrinsic::aarch64_sve_smin_x2:
5826 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5827 Node->getValueType(0),
5828 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
5829 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
5830 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5831 return;
5832 case Intrinsic::aarch64_sve_umin_x2:
5833 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5834 Node->getValueType(0),
5835 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
5836 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
5837 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5838 return;
5839 case Intrinsic::aarch64_sve_fmin_x2:
5840 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5841 Node->getValueType(0),
5842 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
5843 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
5844 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5845 return;
5846 case Intrinsic::aarch64_sve_smin_x4:
5847 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5848 Node->getValueType(0),
5849 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
5850 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
5851 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5852 return;
5853 case Intrinsic::aarch64_sve_umin_x4:
5854 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5855 Node->getValueType(0),
5856 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
5857 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
5858 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5859 return;
5860 case Intrinsic::aarch64_sve_fmin_x4:
5861 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5862 Node->getValueType(0),
5863 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
5864 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
5865 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5866 return;
5867 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
5868 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5869 Node->getValueType(0),
5870 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
5871 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
5872 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5873 return;
5874 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
5875 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5876 Node->getValueType(0),
5877 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
5878 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
5879 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5880 return;
5881 case Intrinsic::aarch64_sve_fminnm_single_x2:
5882 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5883 Node->getValueType(0),
5884 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
5885 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
5886 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5887 return;
5888 case Intrinsic::aarch64_sve_fminnm_single_x4:
5889 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5890 Node->getValueType(0),
5891 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
5892 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
5893 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5894 return;
5895 case Intrinsic::aarch64_sve_fmaxnm_x2:
5896 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5897 Node->getValueType(0),
5898 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
5899 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
5900 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5901 return;
5902 case Intrinsic::aarch64_sve_fmaxnm_x4:
5903 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5904 Node->getValueType(0),
5905 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
5906 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
5907 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5908 return;
5909 case Intrinsic::aarch64_sve_fminnm_x2:
5910 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5911 Node->getValueType(0),
5912 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
5913 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
5914 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5915 return;
5916 case Intrinsic::aarch64_sve_fminnm_x4:
5917 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5918 Node->getValueType(0),
5919 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
5920 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
5921 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5922 return;
5923 case Intrinsic::aarch64_sve_fcvtzs_x2:
5924 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
5925 return;
5926 case Intrinsic::aarch64_sve_scvtf_x2:
5927 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
5928 return;
5929 case Intrinsic::aarch64_sve_fcvtzu_x2:
5930 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
5931 return;
5932 case Intrinsic::aarch64_sve_ucvtf_x2:
5933 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
5934 return;
5935 case Intrinsic::aarch64_sve_fcvtzs_x4:
5936 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
5937 return;
5938 case Intrinsic::aarch64_sve_scvtf_x4:
5939 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
5940 return;
5941 case Intrinsic::aarch64_sve_fcvtzu_x4:
5942 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
5943 return;
5944 case Intrinsic::aarch64_sve_ucvtf_x4:
5945 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
5946 return;
5947 case Intrinsic::aarch64_sve_fcvt_widen_x2:
5948 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
5949 return;
5950 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
5951 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
5952 return;
5953 case Intrinsic::aarch64_sve_sclamp_single_x2:
5954 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5955 Node->getValueType(0),
5956 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
5957 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
5958 SelectClamp(Node, 2, Op);
5959 return;
5960 case Intrinsic::aarch64_sve_uclamp_single_x2:
5961 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5962 Node->getValueType(0),
5963 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
5964 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
5965 SelectClamp(Node, 2, Op);
5966 return;
5967 case Intrinsic::aarch64_sve_fclamp_single_x2:
5968 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5969 Node->getValueType(0),
5970 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
5971 AArch64::FCLAMP_VG2_2Z2Z_D}))
5972 SelectClamp(Node, 2, Op);
5973 return;
5974 case Intrinsic::aarch64_sve_bfclamp_single_x2:
5975 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
5976 return;
5977 case Intrinsic::aarch64_sve_sclamp_single_x4:
5978 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5979 Node->getValueType(0),
5980 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
5981 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
5982 SelectClamp(Node, 4, Op);
5983 return;
5984 case Intrinsic::aarch64_sve_uclamp_single_x4:
5985 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5986 Node->getValueType(0),
5987 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
5988 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
5989 SelectClamp(Node, 4, Op);
5990 return;
5991 case Intrinsic::aarch64_sve_fclamp_single_x4:
5992 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5993 Node->getValueType(0),
5994 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
5995 AArch64::FCLAMP_VG4_4Z4Z_D}))
5996 SelectClamp(Node, 4, Op);
5997 return;
5998 case Intrinsic::aarch64_sve_bfclamp_single_x4:
5999 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6000 return;
6001 case Intrinsic::aarch64_sve_add_single_x2:
6002 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6003 Node->getValueType(0),
6004 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6005 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6006 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6007 return;
6008 case Intrinsic::aarch64_sve_add_single_x4:
6009 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6010 Node->getValueType(0),
6011 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6012 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6013 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6014 return;
6015 case Intrinsic::aarch64_sve_zip_x2:
6016 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6017 Node->getValueType(0),
6018 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6019 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6020 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6021 return;
6022 case Intrinsic::aarch64_sve_zipq_x2:
6023 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6024 AArch64::ZIP_VG2_2ZZZ_Q);
6025 return;
6026 case Intrinsic::aarch64_sve_zip_x4:
6027 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6028 Node->getValueType(0),
6029 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6030 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6031 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6032 return;
6033 case Intrinsic::aarch64_sve_zipq_x4:
6034 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6035 AArch64::ZIP_VG4_4Z4Z_Q);
6036 return;
6037 case Intrinsic::aarch64_sve_uzp_x2:
6038 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6039 Node->getValueType(0),
6040 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6041 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6042 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6043 return;
6044 case Intrinsic::aarch64_sve_uzpq_x2:
6045 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6046 AArch64::UZP_VG2_2ZZZ_Q);
6047 return;
6048 case Intrinsic::aarch64_sve_uzp_x4:
6049 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6050 Node->getValueType(0),
6051 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6052 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6053 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6054 return;
6055 case Intrinsic::aarch64_sve_uzpq_x4:
6056 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6057 AArch64::UZP_VG4_4Z4Z_Q);
6058 return;
6059 case Intrinsic::aarch64_sve_sel_x2:
6060 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6061 Node->getValueType(0),
6062 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6063 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6064 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6065 return;
6066 case Intrinsic::aarch64_sve_sel_x4:
6067 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6068 Node->getValueType(0),
6069 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6070 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6071 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6072 return;
6073 case Intrinsic::aarch64_sve_frinta_x2:
6074 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6075 return;
6076 case Intrinsic::aarch64_sve_frinta_x4:
6077 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6078 return;
6079 case Intrinsic::aarch64_sve_frintm_x2:
6080 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6081 return;
6082 case Intrinsic::aarch64_sve_frintm_x4:
6083 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6084 return;
6085 case Intrinsic::aarch64_sve_frintn_x2:
6086 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6087 return;
6088 case Intrinsic::aarch64_sve_frintn_x4:
6089 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6090 return;
6091 case Intrinsic::aarch64_sve_frintp_x2:
6092 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6093 return;
6094 case Intrinsic::aarch64_sve_frintp_x4:
6095 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6096 return;
6097 case Intrinsic::aarch64_sve_sunpk_x2:
6098 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6099 Node->getValueType(0),
6100 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6101 AArch64::SUNPK_VG2_2ZZ_D}))
6102 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6103 return;
6104 case Intrinsic::aarch64_sve_uunpk_x2:
6105 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6106 Node->getValueType(0),
6107 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6108 AArch64::UUNPK_VG2_2ZZ_D}))
6109 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6110 return;
6111 case Intrinsic::aarch64_sve_sunpk_x4:
6112 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6113 Node->getValueType(0),
6114 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6115 AArch64::SUNPK_VG4_4Z2Z_D}))
6116 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6117 return;
6118 case Intrinsic::aarch64_sve_uunpk_x4:
6119 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6120 Node->getValueType(0),
6121 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6122 AArch64::UUNPK_VG4_4Z2Z_D}))
6123 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6124 return;
6125 case Intrinsic::aarch64_sve_pext_x2: {
6126 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6127 Node->getValueType(0),
6128 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6129 AArch64::PEXT_2PCI_D}))
6130 SelectPExtPair(Node, Op);
6131 return;
6132 }
6133 }
6134 break;
6135 }
6136 case ISD::INTRINSIC_VOID: {
6137 unsigned IntNo = Node->getConstantOperandVal(1);
6138 if (Node->getNumOperands() >= 3)
6139 VT = Node->getOperand(2)->getValueType(0);
6140 switch (IntNo) {
6141 default:
6142 break;
6143 case Intrinsic::aarch64_neon_st1x2: {
6144 if (VT == MVT::v8i8) {
6145 SelectStore(Node, 2, AArch64::ST1Twov8b);
6146 return;
6147 } else if (VT == MVT::v16i8) {
6148 SelectStore(Node, 2, AArch64::ST1Twov16b);
6149 return;
6150 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6151 VT == MVT::v4bf16) {
6152 SelectStore(Node, 2, AArch64::ST1Twov4h);
6153 return;
6154 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6155 VT == MVT::v8bf16) {
6156 SelectStore(Node, 2, AArch64::ST1Twov8h);
6157 return;
6158 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6159 SelectStore(Node, 2, AArch64::ST1Twov2s);
6160 return;
6161 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6162 SelectStore(Node, 2, AArch64::ST1Twov4s);
6163 return;
6164 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6165 SelectStore(Node, 2, AArch64::ST1Twov2d);
6166 return;
6167 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6168 SelectStore(Node, 2, AArch64::ST1Twov1d);
6169 return;
6170 }
6171 break;
6172 }
6173 case Intrinsic::aarch64_neon_st1x3: {
6174 if (VT == MVT::v8i8) {
6175 SelectStore(Node, 3, AArch64::ST1Threev8b);
6176 return;
6177 } else if (VT == MVT::v16i8) {
6178 SelectStore(Node, 3, AArch64::ST1Threev16b);
6179 return;
6180 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6181 VT == MVT::v4bf16) {
6182 SelectStore(Node, 3, AArch64::ST1Threev4h);
6183 return;
6184 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6185 VT == MVT::v8bf16) {
6186 SelectStore(Node, 3, AArch64::ST1Threev8h);
6187 return;
6188 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6189 SelectStore(Node, 3, AArch64::ST1Threev2s);
6190 return;
6191 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6192 SelectStore(Node, 3, AArch64::ST1Threev4s);
6193 return;
6194 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6195 SelectStore(Node, 3, AArch64::ST1Threev2d);
6196 return;
6197 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6198 SelectStore(Node, 3, AArch64::ST1Threev1d);
6199 return;
6200 }
6201 break;
6202 }
6203 case Intrinsic::aarch64_neon_st1x4: {
6204 if (VT == MVT::v8i8) {
6205 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6206 return;
6207 } else if (VT == MVT::v16i8) {
6208 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6209 return;
6210 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6211 VT == MVT::v4bf16) {
6212 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6213 return;
6214 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6215 VT == MVT::v8bf16) {
6216 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6217 return;
6218 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6219 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6220 return;
6221 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6222 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6223 return;
6224 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6225 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6226 return;
6227 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6228 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6229 return;
6230 }
6231 break;
6232 }
6233 case Intrinsic::aarch64_neon_st2: {
6234 if (VT == MVT::v8i8) {
6235 SelectStore(Node, 2, AArch64::ST2Twov8b);
6236 return;
6237 } else if (VT == MVT::v16i8) {
6238 SelectStore(Node, 2, AArch64::ST2Twov16b);
6239 return;
6240 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6241 VT == MVT::v4bf16) {
6242 SelectStore(Node, 2, AArch64::ST2Twov4h);
6243 return;
6244 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6245 VT == MVT::v8bf16) {
6246 SelectStore(Node, 2, AArch64::ST2Twov8h);
6247 return;
6248 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6249 SelectStore(Node, 2, AArch64::ST2Twov2s);
6250 return;
6251 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6252 SelectStore(Node, 2, AArch64::ST2Twov4s);
6253 return;
6254 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6255 SelectStore(Node, 2, AArch64::ST2Twov2d);
6256 return;
6257 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6258 SelectStore(Node, 2, AArch64::ST1Twov1d);
6259 return;
6260 }
6261 break;
6262 }
6263 case Intrinsic::aarch64_neon_st3: {
6264 if (VT == MVT::v8i8) {
6265 SelectStore(Node, 3, AArch64::ST3Threev8b);
6266 return;
6267 } else if (VT == MVT::v16i8) {
6268 SelectStore(Node, 3, AArch64::ST3Threev16b);
6269 return;
6270 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6271 VT == MVT::v4bf16) {
6272 SelectStore(Node, 3, AArch64::ST3Threev4h);
6273 return;
6274 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6275 VT == MVT::v8bf16) {
6276 SelectStore(Node, 3, AArch64::ST3Threev8h);
6277 return;
6278 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6279 SelectStore(Node, 3, AArch64::ST3Threev2s);
6280 return;
6281 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6282 SelectStore(Node, 3, AArch64::ST3Threev4s);
6283 return;
6284 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6285 SelectStore(Node, 3, AArch64::ST3Threev2d);
6286 return;
6287 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6288 SelectStore(Node, 3, AArch64::ST1Threev1d);
6289 return;
6290 }
6291 break;
6292 }
6293 case Intrinsic::aarch64_neon_st4: {
6294 if (VT == MVT::v8i8) {
6295 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6296 return;
6297 } else if (VT == MVT::v16i8) {
6298 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6299 return;
6300 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6301 VT == MVT::v4bf16) {
6302 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6303 return;
6304 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6305 VT == MVT::v8bf16) {
6306 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6307 return;
6308 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6309 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6310 return;
6311 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6312 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6313 return;
6314 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6315 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6316 return;
6317 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6318 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6319 return;
6320 }
6321 break;
6322 }
6323 case Intrinsic::aarch64_neon_st2lane: {
6324 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6325 SelectStoreLane(Node, 2, AArch64::ST2i8);
6326 return;
6327 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6328 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6329 SelectStoreLane(Node, 2, AArch64::ST2i16);
6330 return;
6331 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6332 VT == MVT::v2f32) {
6333 SelectStoreLane(Node, 2, AArch64::ST2i32);
6334 return;
6335 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6336 VT == MVT::v1f64) {
6337 SelectStoreLane(Node, 2, AArch64::ST2i64);
6338 return;
6339 }
6340 break;
6341 }
6342 case Intrinsic::aarch64_neon_st3lane: {
6343 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6344 SelectStoreLane(Node, 3, AArch64::ST3i8);
6345 return;
6346 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6347 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6348 SelectStoreLane(Node, 3, AArch64::ST3i16);
6349 return;
6350 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6351 VT == MVT::v2f32) {
6352 SelectStoreLane(Node, 3, AArch64::ST3i32);
6353 return;
6354 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6355 VT == MVT::v1f64) {
6356 SelectStoreLane(Node, 3, AArch64::ST3i64);
6357 return;
6358 }
6359 break;
6360 }
6361 case Intrinsic::aarch64_neon_st4lane: {
6362 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6363 SelectStoreLane(Node, 4, AArch64::ST4i8);
6364 return;
6365 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6366 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6367 SelectStoreLane(Node, 4, AArch64::ST4i16);
6368 return;
6369 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6370 VT == MVT::v2f32) {
6371 SelectStoreLane(Node, 4, AArch64::ST4i32);
6372 return;
6373 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6374 VT == MVT::v1f64) {
6375 SelectStoreLane(Node, 4, AArch64::ST4i64);
6376 return;
6377 }
6378 break;
6379 }
6380 case Intrinsic::aarch64_sve_st2q: {
6381 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6382 return;
6383 }
6384 case Intrinsic::aarch64_sve_st3q: {
6385 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6386 return;
6387 }
6388 case Intrinsic::aarch64_sve_st4q: {
6389 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6390 return;
6391 }
6392 case Intrinsic::aarch64_sve_st2: {
6393 if (VT == MVT::nxv16i8) {
6394 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6395 return;
6396 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6397 VT == MVT::nxv8bf16) {
6398 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6399 return;
6400 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6401 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6402 return;
6403 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6404 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6405 return;
6406 }
6407 break;
6408 }
6409 case Intrinsic::aarch64_sve_st3: {
6410 if (VT == MVT::nxv16i8) {
6411 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6412 return;
6413 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6414 VT == MVT::nxv8bf16) {
6415 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6416 return;
6417 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6418 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6419 return;
6420 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6421 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6422 return;
6423 }
6424 break;
6425 }
6426 case Intrinsic::aarch64_sve_st4: {
6427 if (VT == MVT::nxv16i8) {
6428 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6429 return;
6430 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6431 VT == MVT::nxv8bf16) {
6432 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6433 return;
6434 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6435 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6436 return;
6437 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6438 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6439 return;
6440 }
6441 break;
6442 }
6443 }
6444 break;
6445 }
6446 case AArch64ISD::LD2post: {
6447 if (VT == MVT::v8i8) {
6448 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6449 return;
6450 } else if (VT == MVT::v16i8) {
6451 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6452 return;
6453 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6454 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6455 return;
6456 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6457 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6458 return;
6459 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6460 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6461 return;
6462 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6463 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6464 return;
6465 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6466 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6467 return;
6468 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6469 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6470 return;
6471 }
6472 break;
6473 }
6474 case AArch64ISD::LD3post: {
6475 if (VT == MVT::v8i8) {
6476 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6477 return;
6478 } else if (VT == MVT::v16i8) {
6479 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6480 return;
6481 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6482 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6483 return;
6484 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6485 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6486 return;
6487 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6488 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6489 return;
6490 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6491 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6492 return;
6493 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6494 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6495 return;
6496 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6497 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6498 return;
6499 }
6500 break;
6501 }
6502 case AArch64ISD::LD4post: {
6503 if (VT == MVT::v8i8) {
6504 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6505 return;
6506 } else if (VT == MVT::v16i8) {
6507 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6508 return;
6509 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6510 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6511 return;
6512 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6513 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6514 return;
6515 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6516 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6517 return;
6518 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6519 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6520 return;
6521 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6522 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6523 return;
6524 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6525 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6526 return;
6527 }
6528 break;
6529 }
6530 case AArch64ISD::LD1x2post: {
6531 if (VT == MVT::v8i8) {
6532 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6533 return;
6534 } else if (VT == MVT::v16i8) {
6535 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6536 return;
6537 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6538 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6539 return;
6540 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6541 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6542 return;
6543 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6544 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6545 return;
6546 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6547 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6548 return;
6549 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6550 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6551 return;
6552 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6553 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6554 return;
6555 }
6556 break;
6557 }
6558 case AArch64ISD::LD1x3post: {
6559 if (VT == MVT::v8i8) {
6560 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6561 return;
6562 } else if (VT == MVT::v16i8) {
6563 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6564 return;
6565 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6566 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6567 return;
6568 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6569 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6570 return;
6571 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6572 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6573 return;
6574 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6575 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6576 return;
6577 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6578 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6579 return;
6580 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6581 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6582 return;
6583 }
6584 break;
6585 }
6586 case AArch64ISD::LD1x4post: {
6587 if (VT == MVT::v8i8) {
6588 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6589 return;
6590 } else if (VT == MVT::v16i8) {
6591 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6592 return;
6593 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6594 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6595 return;
6596 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6597 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6598 return;
6599 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6600 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6601 return;
6602 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6603 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6604 return;
6605 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6606 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6607 return;
6608 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6609 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6610 return;
6611 }
6612 break;
6613 }
6615 if (VT == MVT::v8i8) {
6616 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6617 return;
6618 } else if (VT == MVT::v16i8) {
6619 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6620 return;
6621 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6622 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6623 return;
6624 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6625 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6626 return;
6627 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6628 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6629 return;
6630 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6631 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6632 return;
6633 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6634 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6635 return;
6636 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6637 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6638 return;
6639 }
6640 break;
6641 }
6643 if (VT == MVT::v8i8) {
6644 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6645 return;
6646 } else if (VT == MVT::v16i8) {
6647 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6648 return;
6649 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6650 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6651 return;
6652 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6653 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
6654 return;
6655 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6656 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
6657 return;
6658 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6659 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6660 return;
6661 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6662 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
6663 return;
6664 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6665 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
6666 return;
6667 }
6668 break;
6669 }
6671 if (VT == MVT::v8i8) {
6672 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
6673 return;
6674 } else if (VT == MVT::v16i8) {
6675 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
6676 return;
6677 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6678 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
6679 return;
6680 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6681 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
6682 return;
6683 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6684 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
6685 return;
6686 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6687 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
6688 return;
6689 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6690 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
6691 return;
6692 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6693 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
6694 return;
6695 }
6696 break;
6697 }
6699 if (VT == MVT::v8i8) {
6700 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
6701 return;
6702 } else if (VT == MVT::v16i8) {
6703 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
6704 return;
6705 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6706 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
6707 return;
6708 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6709 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
6710 return;
6711 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6712 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
6713 return;
6714 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6715 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
6716 return;
6717 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6718 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
6719 return;
6720 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6721 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
6722 return;
6723 }
6724 break;
6725 }
6727 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6728 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
6729 return;
6730 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6731 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6732 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
6733 return;
6734 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6735 VT == MVT::v2f32) {
6736 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
6737 return;
6738 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6739 VT == MVT::v1f64) {
6740 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
6741 return;
6742 }
6743 break;
6744 }
6746 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6747 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
6748 return;
6749 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6750 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6751 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
6752 return;
6753 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6754 VT == MVT::v2f32) {
6755 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
6756 return;
6757 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6758 VT == MVT::v1f64) {
6759 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
6760 return;
6761 }
6762 break;
6763 }
6765 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6766 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
6767 return;
6768 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6769 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6770 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
6771 return;
6772 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6773 VT == MVT::v2f32) {
6774 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
6775 return;
6776 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6777 VT == MVT::v1f64) {
6778 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
6779 return;
6780 }
6781 break;
6782 }
6784 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6785 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
6786 return;
6787 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6788 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6789 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
6790 return;
6791 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6792 VT == MVT::v2f32) {
6793 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
6794 return;
6795 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6796 VT == MVT::v1f64) {
6797 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
6798 return;
6799 }
6800 break;
6801 }
6802 case AArch64ISD::ST2post: {
6803 VT = Node->getOperand(1).getValueType();
6804 if (VT == MVT::v8i8) {
6805 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
6806 return;
6807 } else if (VT == MVT::v16i8) {
6808 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
6809 return;
6810 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6811 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
6812 return;
6813 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6814 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
6815 return;
6816 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6817 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
6818 return;
6819 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6820 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
6821 return;
6822 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6823 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
6824 return;
6825 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6826 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6827 return;
6828 }
6829 break;
6830 }
6831 case AArch64ISD::ST3post: {
6832 VT = Node->getOperand(1).getValueType();
6833 if (VT == MVT::v8i8) {
6834 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
6835 return;
6836 } else if (VT == MVT::v16i8) {
6837 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
6838 return;
6839 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6840 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
6841 return;
6842 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6843 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
6844 return;
6845 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6846 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
6847 return;
6848 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6849 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
6850 return;
6851 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6852 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
6853 return;
6854 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6855 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6856 return;
6857 }
6858 break;
6859 }
6860 case AArch64ISD::ST4post: {
6861 VT = Node->getOperand(1).getValueType();
6862 if (VT == MVT::v8i8) {
6863 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
6864 return;
6865 } else if (VT == MVT::v16i8) {
6866 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
6867 return;
6868 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6869 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
6870 return;
6871 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6872 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
6873 return;
6874 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6875 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
6876 return;
6877 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6878 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
6879 return;
6880 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6881 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
6882 return;
6883 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6884 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6885 return;
6886 }
6887 break;
6888 }
6889 case AArch64ISD::ST1x2post: {
6890 VT = Node->getOperand(1).getValueType();
6891 if (VT == MVT::v8i8) {
6892 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
6893 return;
6894 } else if (VT == MVT::v16i8) {
6895 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
6896 return;
6897 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6898 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
6899 return;
6900 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6901 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
6902 return;
6903 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6904 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
6905 return;
6906 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6907 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
6908 return;
6909 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6910 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6911 return;
6912 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6913 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
6914 return;
6915 }
6916 break;
6917 }
6918 case AArch64ISD::ST1x3post: {
6919 VT = Node->getOperand(1).getValueType();
6920 if (VT == MVT::v8i8) {
6921 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
6922 return;
6923 } else if (VT == MVT::v16i8) {
6924 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
6925 return;
6926 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6927 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
6928 return;
6929 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
6930 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
6931 return;
6932 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6933 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
6934 return;
6935 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6936 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
6937 return;
6938 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6939 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6940 return;
6941 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6942 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
6943 return;
6944 }
6945 break;
6946 }
6947 case AArch64ISD::ST1x4post: {
6948 VT = Node->getOperand(1).getValueType();
6949 if (VT == MVT::v8i8) {
6950 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
6951 return;
6952 } else if (VT == MVT::v16i8) {
6953 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
6954 return;
6955 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6956 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
6957 return;
6958 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6959 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
6960 return;
6961 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6962 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
6963 return;
6964 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6965 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
6966 return;
6967 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6968 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6969 return;
6970 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6971 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
6972 return;
6973 }
6974 break;
6975 }
6977 VT = Node->getOperand(1).getValueType();
6978 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6979 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
6980 return;
6981 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6982 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6983 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
6984 return;
6985 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6986 VT == MVT::v2f32) {
6987 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
6988 return;
6989 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6990 VT == MVT::v1f64) {
6991 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
6992 return;
6993 }
6994 break;
6995 }
6997 VT = Node->getOperand(1).getValueType();
6998 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6999 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7000 return;
7001 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7002 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7003 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7004 return;
7005 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7006 VT == MVT::v2f32) {
7007 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7008 return;
7009 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7010 VT == MVT::v1f64) {
7011 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7012 return;
7013 }
7014 break;
7015 }
7017 VT = Node->getOperand(1).getValueType();
7018 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7019 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7020 return;
7021 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7022 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7023 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7024 return;
7025 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7026 VT == MVT::v2f32) {
7027 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7028 return;
7029 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7030 VT == MVT::v1f64) {
7031 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7032 return;
7033 }
7034 break;
7035 }
7037 if (VT == MVT::nxv16i8) {
7038 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
7039 return;
7040 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7041 VT == MVT::nxv8bf16) {
7042 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
7043 return;
7044 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7045 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
7046 return;
7047 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7048 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
7049 return;
7050 }
7051 break;
7052 }
7054 if (VT == MVT::nxv16i8) {
7055 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
7056 return;
7057 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7058 VT == MVT::nxv8bf16) {
7059 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
7060 return;
7061 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7062 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
7063 return;
7064 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7065 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
7066 return;
7067 }
7068 break;
7069 }
7071 if (VT == MVT::nxv16i8) {
7072 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
7073 return;
7074 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7075 VT == MVT::nxv8bf16) {
7076 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
7077 return;
7078 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7079 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
7080 return;
7081 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7082 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
7083 return;
7084 }
7085 break;
7086 }
7087 }
7088
7089 // Select the default instruction
7090 SelectCode(Node);
7091}
7092
7093/// createAArch64ISelDag - This pass converts a legalized DAG into a
7094/// AArch64-specific DAG, ready for instruction scheduling.
7096 CodeGenOptLevel OptLevel) {
7097 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7098}
7099
7100/// When \p PredVT is a scalable vector predicate in the form
7101/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7102/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7103/// structured vectors (NumVec >1), the output data type is
7104/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7105/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7106/// EVT.
7108 unsigned NumVec) {
7109 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7110 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7111 return EVT();
7112
7113 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7114 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7115 return EVT();
7116
7117 ElementCount EC = PredVT.getVectorElementCount();
7118 EVT ScalarVT =
7119 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7120 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7121
7122 return MemVT;
7123}
7124
7125/// Return the EVT of the data associated to a memory operation in \p
7126/// Root. If such EVT cannot be retrived, it returns an invalid EVT.
7128 if (isa<MemSDNode>(Root))
7129 return cast<MemSDNode>(Root)->getMemoryVT();
7130
7131 if (isa<MemIntrinsicSDNode>(Root))
7132 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
7133
7134 const unsigned Opcode = Root->getOpcode();
7135 // For custom ISD nodes, we have to look at them individually to extract the
7136 // type of the data moved to/from memory.
7137 switch (Opcode) {
7142 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7144 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7147 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
7150 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
7153 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
7154 default:
7155 break;
7156 }
7157
7158 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7159 return EVT();
7160
7161 switch (Root->getConstantOperandVal(1)) {
7162 default:
7163 return EVT();
7164 case Intrinsic::aarch64_sme_ldr:
7165 case Intrinsic::aarch64_sme_str:
7166 return MVT::nxv16i8;
7167 case Intrinsic::aarch64_sve_prf:
7168 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7169 // width of the predicate.
7171 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7172 case Intrinsic::aarch64_sve_ld2_sret:
7173 case Intrinsic::aarch64_sve_ld2q_sret:
7175 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7176 case Intrinsic::aarch64_sve_st2q:
7178 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7179 case Intrinsic::aarch64_sve_ld3_sret:
7180 case Intrinsic::aarch64_sve_ld3q_sret:
7182 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7183 case Intrinsic::aarch64_sve_st3q:
7185 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7186 case Intrinsic::aarch64_sve_ld4_sret:
7187 case Intrinsic::aarch64_sve_ld4q_sret:
7189 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7190 case Intrinsic::aarch64_sve_st4q:
7192 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7193 case Intrinsic::aarch64_sve_ld1udq:
7194 case Intrinsic::aarch64_sve_st1dq:
7195 return EVT(MVT::nxv1i64);
7196 case Intrinsic::aarch64_sve_ld1uwq:
7197 case Intrinsic::aarch64_sve_st1wq:
7198 return EVT(MVT::nxv1i32);
7199 }
7200}
7201
7202/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7203/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7204/// where Root is the memory access using N for its address.
7205template <int64_t Min, int64_t Max>
7206bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7207 SDValue &Base,
7208 SDValue &OffImm) {
7209 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7210 const DataLayout &DL = CurDAG->getDataLayout();
7211 const MachineFrameInfo &MFI = MF->getFrameInfo();
7212
7213 if (N.getOpcode() == ISD::FrameIndex) {
7214 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7215 // We can only encode VL scaled offsets, so only fold in frame indexes
7216 // referencing SVE objects.
7218 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7219 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7220 return true;
7221 }
7222
7223 return false;
7224 }
7225
7226 if (MemVT == EVT())
7227 return false;
7228
7229 if (N.getOpcode() != ISD::ADD)
7230 return false;
7231
7232 SDValue VScale = N.getOperand(1);
7233 if (VScale.getOpcode() != ISD::VSCALE)
7234 return false;
7235
7236 TypeSize TS = MemVT.getSizeInBits();
7237 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7238 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7239
7240 if ((MulImm % MemWidthBytes) != 0)
7241 return false;
7242
7243 int64_t Offset = MulImm / MemWidthBytes;
7244 if (Offset < Min || Offset > Max)
7245 return false;
7246
7247 Base = N.getOperand(0);
7248 if (Base.getOpcode() == ISD::FrameIndex) {
7249 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7250 // We can only encode VL scaled offsets, so only fold in frame indexes
7251 // referencing SVE objects.
7253 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7254 }
7255
7256 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7257 return true;
7258}
7259
7260/// Select register plus register addressing mode for SVE, with scaled
7261/// offset.
7262bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7263 SDValue &Base,
7264 SDValue &Offset) {
7265 if (N.getOpcode() != ISD::ADD)
7266 return false;
7267
7268 // Process an ADD node.
7269 const SDValue LHS = N.getOperand(0);
7270 const SDValue RHS = N.getOperand(1);
7271
7272 // 8 bit data does not come with the SHL node, so it is treated
7273 // separately.
7274 if (Scale == 0) {
7275 Base = LHS;
7276 Offset = RHS;
7277 return true;
7278 }
7279
7280 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7281 int64_t ImmOff = C->getSExtValue();
7282 unsigned Size = 1 << Scale;
7283
7284 // To use the reg+reg addressing mode, the immediate must be a multiple of
7285 // the vector element's byte size.
7286 if (ImmOff % Size)
7287 return false;
7288
7289 SDLoc DL(N);
7290 Base = LHS;
7291 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7292 SDValue Ops[] = {Offset};
7293 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7294 Offset = SDValue(MI, 0);
7295 return true;
7296 }
7297
7298 // Check if the RHS is a shift node with a constant.
7299 if (RHS.getOpcode() != ISD::SHL)
7300 return false;
7301
7302 const SDValue ShiftRHS = RHS.getOperand(1);
7303 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7304 if (C->getZExtValue() == Scale) {
7305 Base = LHS;
7306 Offset = RHS.getOperand(0);
7307 return true;
7308 }
7309
7310 return false;
7311}
7312
7313bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7314 const AArch64TargetLowering *TLI =
7315 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7316
7317 return TLI->isAllActivePredicate(*CurDAG, N);
7318}
7319
7320bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7321 EVT VT = N.getValueType();
7322 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7323}
7324
7325bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7327 unsigned Scale) {
7328 // Try to untangle an ADD node into a 'reg + offset'
7329 if (N.getOpcode() == ISD::ADD)
7330 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
7331 int64_t ImmOff = C->getSExtValue();
7332 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
7333 Base = N.getOperand(0);
7334 Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7335 return true;
7336 }
7337 }
7338
7339 // By default, just match reg + 0.
7340 Base = N;
7341 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7342 return true;
7343}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
#define PASS_NAME
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
#define DEBUG_TYPE
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
aarch64 promote const
amdgpu AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * RHS
Value * LHS
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
DEMANGLE_DUMP_METHOD void dump() const
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
bool getExactInverse(APFloat *inv) const
Definition: APFloat.h:1399
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1241
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1627
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1446
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1596
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1555
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1412
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:488
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint8_t getStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
iterator_range< use_iterator > uses()
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:226
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:451
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:685
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:927
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
const SysReg * lookupSysRegByName(StringRef)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1309
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1099
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:953
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1305
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:641
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1397
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1316
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:848
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:223
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1552
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1583
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:480
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:279
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1935
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Extended Value Type.
Definition: ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:341
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:359
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:350
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:371
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:307
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:204
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:367
bool isFixedLengthVector() const
Definition: ValueTypes.h:178
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:319
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:327
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:199
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40