LLVM 19.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34//===--------------------------------------------------------------------===//
35/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
36/// instructions for SelectionDAG operations.
37///
38namespace {
39
40class AArch64DAGToDAGISel : public SelectionDAGISel {
41
42 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
43 /// make the right decision when generating code for different targets.
44 const AArch64Subtarget *Subtarget;
45
46public:
47 AArch64DAGToDAGISel() = delete;
48
49 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
50 CodeGenOptLevel OptLevel)
51 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
52
53 bool runOnMachineFunction(MachineFunction &MF) override {
54 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
56 }
57
58 void Select(SDNode *Node) override;
59
60 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
61 /// inline asm expressions.
63 InlineAsm::ConstraintCode ConstraintID,
64 std::vector<SDValue> &OutOps) override;
65
66 template <signed Low, signed High, signed Scale>
67 bool SelectRDVLImm(SDValue N, SDValue &Imm);
68
69 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
70 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
71 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
72 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
74 return SelectShiftedRegister(N, false, Reg, Shift);
75 }
76 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
77 return SelectShiftedRegister(N, true, Reg, Shift);
78 }
79 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
80 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
81 }
82 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
83 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
84 }
85 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
86 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
87 }
88 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
89 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
90 }
91 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
92 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
93 }
94 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
95 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
96 }
97 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
98 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
99 }
100 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
101 return SelectAddrModeIndexed(N, 1, Base, OffImm);
102 }
103 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
104 return SelectAddrModeIndexed(N, 2, Base, OffImm);
105 }
106 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
107 return SelectAddrModeIndexed(N, 4, Base, OffImm);
108 }
109 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
110 return SelectAddrModeIndexed(N, 8, Base, OffImm);
111 }
112 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
113 return SelectAddrModeIndexed(N, 16, Base, OffImm);
114 }
115 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
116 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
117 }
118 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
119 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
120 }
121 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
122 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
123 }
124 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
125 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
126 }
127 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
128 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
129 }
130 template <unsigned Size, unsigned Max>
131 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
132 // Test if there is an appropriate addressing mode and check if the
133 // immediate fits.
134 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
135 if (Found) {
136 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
137 int64_t C = CI->getSExtValue();
138 if (C <= Max)
139 return true;
140 }
141 }
142
143 // Otherwise, base only, materialize address in register.
144 Base = N;
145 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
146 return true;
147 }
148
149 template<int Width>
150 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
151 SDValue &SignExtend, SDValue &DoShift) {
152 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
153 }
154
155 template<int Width>
156 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
157 SDValue &SignExtend, SDValue &DoShift) {
158 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
159 }
160
161 bool SelectExtractHigh(SDValue N, SDValue &Res) {
162 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
163 N = N->getOperand(0);
164 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
165 !isa<ConstantSDNode>(N->getOperand(1)))
166 return false;
167 EVT VT = N->getValueType(0);
168 EVT LVT = N->getOperand(0).getValueType();
169 unsigned Index = N->getConstantOperandVal(1);
170 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
172 return false;
173 Res = N->getOperand(0);
174 return true;
175 }
176
177 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
178 if (N.getOpcode() != AArch64ISD::VLSHR)
179 return false;
180 SDValue Op = N->getOperand(0);
181 EVT VT = Op.getValueType();
182 unsigned ShtAmt = N->getConstantOperandVal(1);
183 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
184 return false;
185
186 APInt Imm;
187 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
188 Imm = APInt(VT.getScalarSizeInBits(),
189 Op.getOperand(1).getConstantOperandVal(0)
190 << Op.getOperand(1).getConstantOperandVal(1));
191 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
192 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
193 Imm = APInt(VT.getScalarSizeInBits(),
194 Op.getOperand(1).getConstantOperandVal(0));
195 else
196 return false;
197
198 if (Imm != 1ULL << (ShtAmt - 1))
199 return false;
200
201 Res1 = Op.getOperand(0);
202 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
203 return true;
204 }
205
206 bool SelectDupZeroOrUndef(SDValue N) {
207 switch(N->getOpcode()) {
208 case ISD::UNDEF:
209 return true;
210 case AArch64ISD::DUP:
211 case ISD::SPLAT_VECTOR: {
212 auto Opnd0 = N->getOperand(0);
213 if (isNullConstant(Opnd0))
214 return true;
215 if (isNullFPConstant(Opnd0))
216 return true;
217 break;
218 }
219 default:
220 break;
221 }
222
223 return false;
224 }
225
226 bool SelectDupZero(SDValue N) {
227 switch(N->getOpcode()) {
228 case AArch64ISD::DUP:
229 case ISD::SPLAT_VECTOR: {
230 auto Opnd0 = N->getOperand(0);
231 if (isNullConstant(Opnd0))
232 return true;
233 if (isNullFPConstant(Opnd0))
234 return true;
235 break;
236 }
237 }
238
239 return false;
240 }
241
242 bool SelectDupNegativeZero(SDValue N) {
243 switch(N->getOpcode()) {
244 case AArch64ISD::DUP:
245 case ISD::SPLAT_VECTOR: {
246 ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
247 return Const && Const->isZero() && Const->isNegative();
248 }
249 }
250
251 return false;
252 }
253
254 template<MVT::SimpleValueType VT>
255 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
256 return SelectSVEAddSubImm(N, VT, Imm, Shift);
257 }
258
259 template <MVT::SimpleValueType VT, bool Negate>
260 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
261 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
262 }
263
264 template <MVT::SimpleValueType VT>
265 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
266 return SelectSVECpyDupImm(N, VT, Imm, Shift);
267 }
268
269 template <MVT::SimpleValueType VT, bool Invert = false>
270 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
271 return SelectSVELogicalImm(N, VT, Imm, Invert);
272 }
273
274 template <MVT::SimpleValueType VT>
275 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
276 return SelectSVEArithImm(N, VT, Imm);
277 }
278
279 template <unsigned Low, unsigned High, bool AllowSaturation = false>
280 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
281 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
282 }
283
284 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
285 if (N->getOpcode() != ISD::SPLAT_VECTOR)
286 return false;
287
288 EVT EltVT = N->getValueType(0).getVectorElementType();
289 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
290 /* High */ EltVT.getFixedSizeInBits(),
291 /* AllowSaturation */ true, Imm);
292 }
293
294 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
295 template<signed Min, signed Max, signed Scale, bool Shift>
296 bool SelectCntImm(SDValue N, SDValue &Imm) {
297 if (!isa<ConstantSDNode>(N))
298 return false;
299
300 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
301 if (Shift)
302 MulImm = 1LL << MulImm;
303
304 if ((MulImm % std::abs(Scale)) != 0)
305 return false;
306
307 MulImm /= Scale;
308 if ((MulImm >= Min) && (MulImm <= Max)) {
309 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
310 return true;
311 }
312
313 return false;
314 }
315
316 template <signed Max, signed Scale>
317 bool SelectEXTImm(SDValue N, SDValue &Imm) {
318 if (!isa<ConstantSDNode>(N))
319 return false;
320
321 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
322
323 if (MulImm >= 0 && MulImm <= Max) {
324 MulImm *= Scale;
325 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
326 return true;
327 }
328
329 return false;
330 }
331
332 template <unsigned BaseReg, unsigned Max>
333 bool ImmToReg(SDValue N, SDValue &Imm) {
334 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
335 uint64_t C = CI->getZExtValue();
336
337 if (C > Max)
338 return false;
339
340 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
341 return true;
342 }
343 return false;
344 }
345
346 /// Form sequences of consecutive 64/128-bit registers for use in NEON
347 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
348 /// between 1 and 4 elements. If it contains a single element that is returned
349 /// unchanged; otherwise a REG_SEQUENCE value is returned.
352 // Form a sequence of SVE registers for instructions using list of vectors,
353 // e.g. structured loads and stores (ldN, stN).
354 SDValue createZTuple(ArrayRef<SDValue> Vecs);
355
356 // Similar to above, except the register must start at a multiple of the
357 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
358 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
359
360 /// Generic helper for the createDTuple/createQTuple
361 /// functions. Those should almost always be called instead.
362 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
363 const unsigned SubRegs[]);
364
365 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
366
367 bool tryIndexedLoad(SDNode *N);
368
369 bool trySelectStackSlotTagP(SDNode *N);
370 void SelectTagP(SDNode *N);
371
372 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
373 unsigned SubRegIdx);
374 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
375 unsigned SubRegIdx);
376 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
377 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
378 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
379 unsigned Opc_rr, unsigned Opc_ri,
380 bool IsIntr = false);
381 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
382 unsigned Scale, unsigned Opc_ri,
383 unsigned Opc_rr);
384 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
385 bool IsZmMulti, unsigned Opcode,
386 bool HasPred = false);
387 void SelectPExtPair(SDNode *N, unsigned Opc);
388 void SelectWhilePair(SDNode *N, unsigned Opc);
389 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
390 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
391 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
392 bool IsTupleInput, unsigned Opc);
393 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
394
395 template <unsigned MaxIdx, unsigned Scale>
396 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
397 unsigned Op);
398 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
399 unsigned Op, unsigned MaxIdx, unsigned Scale,
400 unsigned BaseReg = 0);
401 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
402 /// SVE Reg+Imm addressing mode.
403 template <int64_t Min, int64_t Max>
404 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
405 SDValue &OffImm);
406 /// SVE Reg+Reg address mode.
407 template <unsigned Scale>
408 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
409 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
410 }
411
412 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc,
413 uint32_t MaxImm);
414
415 template <unsigned MaxIdx, unsigned Scale>
416 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
417 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
418 }
419
420 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
421 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
422 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
423 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
424 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
425 unsigned Opc_rr, unsigned Opc_ri);
426 std::tuple<unsigned, SDValue, SDValue>
427 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
428 const SDValue &OldBase, const SDValue &OldOffset,
429 unsigned Scale);
430
431 bool tryBitfieldExtractOp(SDNode *N);
432 bool tryBitfieldExtractOpFromSExt(SDNode *N);
433 bool tryBitfieldInsertOp(SDNode *N);
434 bool tryBitfieldInsertInZeroOp(SDNode *N);
435 bool tryShiftAmountMod(SDNode *N);
436
437 bool tryReadRegister(SDNode *N);
438 bool tryWriteRegister(SDNode *N);
439
440 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
441 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
442
443 bool trySelectXAR(SDNode *N);
444
445// Include the pieces autogenerated from the target description.
446#include "AArch64GenDAGISel.inc"
447
448private:
449 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
450 SDValue &Shift);
451 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
452 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
453 SDValue &OffImm) {
454 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
455 }
456 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
457 unsigned Size, SDValue &Base,
458 SDValue &OffImm);
459 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
460 SDValue &OffImm);
461 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
462 SDValue &OffImm);
463 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
464 SDValue &Offset, SDValue &SignExtend,
465 SDValue &DoShift);
466 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
467 SDValue &Offset, SDValue &SignExtend,
468 SDValue &DoShift);
469 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
470 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
471 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
472 SDValue &Offset, SDValue &SignExtend);
473
474 template<unsigned RegWidth>
475 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
476 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
477 }
478
479 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
480
481 template<unsigned RegWidth>
482 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
483 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
484 }
485
486 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
487 unsigned Width);
488
489 bool SelectCMP_SWAP(SDNode *N);
490
491 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
492 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
493 bool Negate);
494 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
495 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
496
497 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
498 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
499 bool AllowSaturation, SDValue &Imm);
500
501 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
502 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
503 SDValue &Offset);
504 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
505 SDValue &Offset, unsigned Scale = 1);
506
507 bool SelectAllActivePredicate(SDValue N);
508 bool SelectAnyPredicate(SDValue N);
509};
510
511class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
512public:
513 static char ID;
514 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
515 CodeGenOptLevel OptLevel)
517 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
518};
519} // end anonymous namespace
520
521char AArch64DAGToDAGISelLegacy::ID = 0;
522
523INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
524
525/// isIntImmediate - This method tests to see if the node is a constant
526/// operand. If so Imm will receive the 32-bit value.
527static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
528 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
529 Imm = C->getZExtValue();
530 return true;
531 }
532 return false;
533}
534
535// isIntImmediate - This method tests to see if a constant operand.
536// If so Imm will receive the value.
537static bool isIntImmediate(SDValue N, uint64_t &Imm) {
538 return isIntImmediate(N.getNode(), Imm);
539}
540
541// isOpcWithIntImmediate - This method tests to see if the node is a specific
542// opcode and that it has a immediate integer right operand.
543// If so Imm will receive the 32 bit value.
544static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
545 uint64_t &Imm) {
546 return N->getOpcode() == Opc &&
547 isIntImmediate(N->getOperand(1).getNode(), Imm);
548}
549
550// isIntImmediateEq - This method tests to see if N is a constant operand that
551// is equivalent to 'ImmExpected'.
552#ifndef NDEBUG
553static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
554 uint64_t Imm;
555 if (!isIntImmediate(N.getNode(), Imm))
556 return false;
557 return Imm == ImmExpected;
558}
559#endif
560
561bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
562 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
563 std::vector<SDValue> &OutOps) {
564 switch(ConstraintID) {
565 default:
566 llvm_unreachable("Unexpected asm memory constraint");
567 case InlineAsm::ConstraintCode::m:
568 case InlineAsm::ConstraintCode::o:
569 case InlineAsm::ConstraintCode::Q:
570 // We need to make sure that this one operand does not end up in XZR, thus
571 // require the address to be in a PointerRegClass register.
572 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
573 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
574 SDLoc dl(Op);
575 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
576 SDValue NewOp =
577 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
578 dl, Op.getValueType(),
579 Op, RC), 0);
580 OutOps.push_back(NewOp);
581 return false;
582 }
583 return true;
584}
585
586/// SelectArithImmed - Select an immediate value that can be represented as
587/// a 12-bit value shifted left by either 0 or 12. If so, return true with
588/// Val set to the 12-bit value and Shift set to the shifter operand.
589bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
590 SDValue &Shift) {
591 // This function is called from the addsub_shifted_imm ComplexPattern,
592 // which lists [imm] as the list of opcode it's interested in, however
593 // we still need to check whether the operand is actually an immediate
594 // here because the ComplexPattern opcode list is only used in
595 // root-level opcode matching.
596 if (!isa<ConstantSDNode>(N.getNode()))
597 return false;
598
599 uint64_t Immed = N.getNode()->getAsZExtVal();
600 unsigned ShiftAmt;
601
602 if (Immed >> 12 == 0) {
603 ShiftAmt = 0;
604 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
605 ShiftAmt = 12;
606 Immed = Immed >> 12;
607 } else
608 return false;
609
610 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
611 SDLoc dl(N);
612 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
613 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
614 return true;
615}
616
617/// SelectNegArithImmed - As above, but negates the value before trying to
618/// select it.
619bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
620 SDValue &Shift) {
621 // This function is called from the addsub_shifted_imm ComplexPattern,
622 // which lists [imm] as the list of opcode it's interested in, however
623 // we still need to check whether the operand is actually an immediate
624 // here because the ComplexPattern opcode list is only used in
625 // root-level opcode matching.
626 if (!isa<ConstantSDNode>(N.getNode()))
627 return false;
628
629 // The immediate operand must be a 24-bit zero-extended immediate.
630 uint64_t Immed = N.getNode()->getAsZExtVal();
631
632 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
633 // have the opposite effect on the C flag, so this pattern mustn't match under
634 // those circumstances.
635 if (Immed == 0)
636 return false;
637
638 if (N.getValueType() == MVT::i32)
639 Immed = ~((uint32_t)Immed) + 1;
640 else
641 Immed = ~Immed + 1ULL;
642 if (Immed & 0xFFFFFFFFFF000000ULL)
643 return false;
644
645 Immed &= 0xFFFFFFULL;
646 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
647 Shift);
648}
649
650/// getShiftTypeForNode - Translate a shift node to the corresponding
651/// ShiftType value.
653 switch (N.getOpcode()) {
654 default:
656 case ISD::SHL:
657 return AArch64_AM::LSL;
658 case ISD::SRL:
659 return AArch64_AM::LSR;
660 case ISD::SRA:
661 return AArch64_AM::ASR;
662 case ISD::ROTR:
663 return AArch64_AM::ROR;
664 }
665}
666
667/// Determine whether it is worth it to fold SHL into the addressing
668/// mode.
670 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
671 // It is worth folding logical shift of up to three places.
672 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
673 if (!CSD)
674 return false;
675 unsigned ShiftVal = CSD->getZExtValue();
676 if (ShiftVal > 3)
677 return false;
678
679 // Check if this particular node is reused in any non-memory related
680 // operation. If yes, do not try to fold this node into the address
681 // computation, since the computation will be kept.
682 const SDNode *Node = V.getNode();
683 for (SDNode *UI : Node->uses())
684 if (!isa<MemSDNode>(*UI))
685 for (SDNode *UII : UI->uses())
686 if (!isa<MemSDNode>(*UII))
687 return false;
688 return true;
689}
690
691/// Determine whether it is worth to fold V into an extended register addressing
692/// mode.
693bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
694 // Trivial if we are optimizing for code size or if there is only
695 // one use of the value.
696 if (CurDAG->shouldOptForSize() || V.hasOneUse())
697 return true;
698
699 // If a subtarget has a slow shift, folding a shift into multiple loads
700 // costs additional micro-ops.
701 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
702 return false;
703
704 // Check whether we're going to emit the address arithmetic anyway because
705 // it's used by a non-address operation.
706 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
707 return true;
708 if (V.getOpcode() == ISD::ADD) {
709 const SDValue LHS = V.getOperand(0);
710 const SDValue RHS = V.getOperand(1);
711 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
712 return true;
713 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
714 return true;
715 }
716
717 // It hurts otherwise, since the value will be reused.
718 return false;
719}
720
721/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
722/// to select more shifted register
723bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
724 SDValue &Shift) {
725 EVT VT = N.getValueType();
726 if (VT != MVT::i32 && VT != MVT::i64)
727 return false;
728
729 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
730 return false;
731 SDValue LHS = N.getOperand(0);
732 if (!LHS->hasOneUse())
733 return false;
734
735 unsigned LHSOpcode = LHS->getOpcode();
736 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
737 return false;
738
739 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
740 if (!ShiftAmtNode)
741 return false;
742
743 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
744 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
745 if (!RHSC)
746 return false;
747
748 APInt AndMask = RHSC->getAPIntValue();
749 unsigned LowZBits, MaskLen;
750 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
751 return false;
752
753 unsigned BitWidth = N.getValueSizeInBits();
754 SDLoc DL(LHS);
755 uint64_t NewShiftC;
756 unsigned NewShiftOp;
757 if (LHSOpcode == ISD::SHL) {
758 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
759 // BitWidth != LowZBits + MaskLen doesn't match the pattern
760 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
761 return false;
762
763 NewShiftC = LowZBits - ShiftAmtC;
764 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
765 } else {
766 if (LowZBits == 0)
767 return false;
768
769 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
770 NewShiftC = LowZBits + ShiftAmtC;
771 if (NewShiftC >= BitWidth)
772 return false;
773
774 // SRA need all high bits
775 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
776 return false;
777
778 // SRL high bits can be 0 or 1
779 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
780 return false;
781
782 if (LHSOpcode == ISD::SRL)
783 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
784 else
785 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
786 }
787
788 assert(NewShiftC < BitWidth && "Invalid shift amount");
789 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
790 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
791 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
792 NewShiftAmt, BitWidthMinus1),
793 0);
794 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
795 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
796 return true;
797}
798
799/// getExtendTypeForNode - Translate an extend node to the corresponding
800/// ExtendType value.
802getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
803 if (N.getOpcode() == ISD::SIGN_EXTEND ||
804 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
805 EVT SrcVT;
806 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
807 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
808 else
809 SrcVT = N.getOperand(0).getValueType();
810
811 if (!IsLoadStore && SrcVT == MVT::i8)
812 return AArch64_AM::SXTB;
813 else if (!IsLoadStore && SrcVT == MVT::i16)
814 return AArch64_AM::SXTH;
815 else if (SrcVT == MVT::i32)
816 return AArch64_AM::SXTW;
817 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
818
820 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
821 N.getOpcode() == ISD::ANY_EXTEND) {
822 EVT SrcVT = N.getOperand(0).getValueType();
823 if (!IsLoadStore && SrcVT == MVT::i8)
824 return AArch64_AM::UXTB;
825 else if (!IsLoadStore && SrcVT == MVT::i16)
826 return AArch64_AM::UXTH;
827 else if (SrcVT == MVT::i32)
828 return AArch64_AM::UXTW;
829 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
830
832 } else if (N.getOpcode() == ISD::AND) {
833 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
834 if (!CSD)
836 uint64_t AndMask = CSD->getZExtValue();
837
838 switch (AndMask) {
839 default:
841 case 0xFF:
842 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
843 case 0xFFFF:
844 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
845 case 0xFFFFFFFF:
846 return AArch64_AM::UXTW;
847 }
848 }
849
851}
852
853/// Determine whether it is worth to fold V into an extended register of an
854/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
855/// instruction, and the shift should be treated as worth folding even if has
856/// multiple uses.
857bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
858 // Trivial if we are optimizing for code size or if there is only
859 // one use of the value.
860 if (CurDAG->shouldOptForSize() || V.hasOneUse())
861 return true;
862
863 // If a subtarget has a fastpath LSL we can fold a logical shift into
864 // the add/sub and save a cycle.
865 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
866 V.getConstantOperandVal(1) <= 4 &&
868 return true;
869
870 // It hurts otherwise, since the value will be reused.
871 return false;
872}
873
874/// SelectShiftedRegister - Select a "shifted register" operand. If the value
875/// is not shifted, set the Shift operand to default of "LSL 0". The logical
876/// instructions allow the shifted register to be rotated, but the arithmetic
877/// instructions do not. The AllowROR parameter specifies whether ROR is
878/// supported.
879bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
880 SDValue &Reg, SDValue &Shift) {
881 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
882 return true;
883
885 if (ShType == AArch64_AM::InvalidShiftExtend)
886 return false;
887 if (!AllowROR && ShType == AArch64_AM::ROR)
888 return false;
889
890 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
891 unsigned BitSize = N.getValueSizeInBits();
892 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
893 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
894
895 Reg = N.getOperand(0);
896 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
897 return isWorthFoldingALU(N, true);
898 }
899
900 return false;
901}
902
903/// Instructions that accept extend modifiers like UXTW expect the register
904/// being extended to be a GPR32, but the incoming DAG might be acting on a
905/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
906/// this is the case.
908 if (N.getValueType() == MVT::i32)
909 return N;
910
911 SDLoc dl(N);
912 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
913}
914
915// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
916template<signed Low, signed High, signed Scale>
917bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
918 if (!isa<ConstantSDNode>(N))
919 return false;
920
921 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
922 if ((MulImm % std::abs(Scale)) == 0) {
923 int64_t RDVLImm = MulImm / Scale;
924 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
925 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
926 return true;
927 }
928 }
929
930 return false;
931}
932
933/// SelectArithExtendedRegister - Select a "extended register" operand. This
934/// operand folds in an extend followed by an optional left shift.
935bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
936 SDValue &Shift) {
937 unsigned ShiftVal = 0;
939
940 if (N.getOpcode() == ISD::SHL) {
941 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
942 if (!CSD)
943 return false;
944 ShiftVal = CSD->getZExtValue();
945 if (ShiftVal > 4)
946 return false;
947
948 Ext = getExtendTypeForNode(N.getOperand(0));
950 return false;
951
952 Reg = N.getOperand(0).getOperand(0);
953 } else {
956 return false;
957
958 Reg = N.getOperand(0);
959
960 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
961 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
962 auto isDef32 = [](SDValue N) {
963 unsigned Opc = N.getOpcode();
964 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
965 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
966 Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
967 Opc != ISD::FREEZE;
968 };
969 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
970 isDef32(Reg))
971 return false;
972 }
973
974 // AArch64 mandates that the RHS of the operation must use the smallest
975 // register class that could contain the size being extended from. Thus,
976 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
977 // there might not be an actual 32-bit value in the program. We can
978 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
979 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
980 Reg = narrowIfNeeded(CurDAG, Reg);
981 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
982 MVT::i32);
983 return isWorthFoldingALU(N);
984}
985
986/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
987/// operand is refered by the instructions have SP operand
988bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
989 SDValue &Shift) {
990 unsigned ShiftVal = 0;
992
993 if (N.getOpcode() != ISD::SHL)
994 return false;
995
996 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
997 if (!CSD)
998 return false;
999 ShiftVal = CSD->getZExtValue();
1000 if (ShiftVal > 4)
1001 return false;
1002
1004 Reg = N.getOperand(0);
1005 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1006 MVT::i32);
1007 return isWorthFoldingALU(N);
1008}
1009
1010/// If there's a use of this ADDlow that's not itself a load/store then we'll
1011/// need to create a real ADD instruction from it anyway and there's no point in
1012/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1013/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1014/// leads to duplicated ADRP instructions.
1016 for (auto *Use : N->uses()) {
1017 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
1018 Use->getOpcode() != ISD::ATOMIC_LOAD &&
1019 Use->getOpcode() != ISD::ATOMIC_STORE)
1020 return false;
1021
1022 // ldar and stlr have much more restrictive addressing modes (just a
1023 // register).
1024 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering()))
1025 return false;
1026 }
1027
1028 return true;
1029}
1030
1031/// Check if the immediate offset is valid as a scaled immediate.
1032static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1033 unsigned Size) {
1034 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1035 Offset < (Range << Log2_32(Size)))
1036 return true;
1037 return false;
1038}
1039
1040/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1041/// immediate" address. The "Size" argument is the size in bytes of the memory
1042/// reference, which determines the scale.
1043bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1044 unsigned BW, unsigned Size,
1045 SDValue &Base,
1046 SDValue &OffImm) {
1047 SDLoc dl(N);
1048 const DataLayout &DL = CurDAG->getDataLayout();
1049 const TargetLowering *TLI = getTargetLowering();
1050 if (N.getOpcode() == ISD::FrameIndex) {
1051 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1052 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1053 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1054 return true;
1055 }
1056
1057 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1058 // selected here doesn't support labels/immediates, only base+offset.
1059 if (CurDAG->isBaseWithConstantOffset(N)) {
1060 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1061 if (IsSignedImm) {
1062 int64_t RHSC = RHS->getSExtValue();
1063 unsigned Scale = Log2_32(Size);
1064 int64_t Range = 0x1LL << (BW - 1);
1065
1066 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1067 RHSC < (Range << Scale)) {
1068 Base = N.getOperand(0);
1069 if (Base.getOpcode() == ISD::FrameIndex) {
1070 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1071 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1072 }
1073 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1074 return true;
1075 }
1076 } else {
1077 // unsigned Immediate
1078 uint64_t RHSC = RHS->getZExtValue();
1079 unsigned Scale = Log2_32(Size);
1080 uint64_t Range = 0x1ULL << BW;
1081
1082 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1083 Base = N.getOperand(0);
1084 if (Base.getOpcode() == ISD::FrameIndex) {
1085 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1086 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1087 }
1088 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1089 return true;
1090 }
1091 }
1092 }
1093 }
1094 // Base only. The address will be materialized into a register before
1095 // the memory is accessed.
1096 // add x0, Xbase, #offset
1097 // stp x1, x2, [x0]
1098 Base = N;
1099 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1100 return true;
1101}
1102
1103/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1104/// immediate" address. The "Size" argument is the size in bytes of the memory
1105/// reference, which determines the scale.
1106bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1107 SDValue &Base, SDValue &OffImm) {
1108 SDLoc dl(N);
1109 const DataLayout &DL = CurDAG->getDataLayout();
1110 const TargetLowering *TLI = getTargetLowering();
1111 if (N.getOpcode() == ISD::FrameIndex) {
1112 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1113 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1114 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1115 return true;
1116 }
1117
1118 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1119 GlobalAddressSDNode *GAN =
1120 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1121 Base = N.getOperand(0);
1122 OffImm = N.getOperand(1);
1123 if (!GAN)
1124 return true;
1125
1126 if (GAN->getOffset() % Size == 0 &&
1128 return true;
1129 }
1130
1131 if (CurDAG->isBaseWithConstantOffset(N)) {
1132 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1133 int64_t RHSC = (int64_t)RHS->getZExtValue();
1134 unsigned Scale = Log2_32(Size);
1135 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1136 Base = N.getOperand(0);
1137 if (Base.getOpcode() == ISD::FrameIndex) {
1138 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1139 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1140 }
1141 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1142 return true;
1143 }
1144 }
1145 }
1146
1147 // Before falling back to our general case, check if the unscaled
1148 // instructions can handle this. If so, that's preferable.
1149 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1150 return false;
1151
1152 // Base only. The address will be materialized into a register before
1153 // the memory is accessed.
1154 // add x0, Xbase, #offset
1155 // ldr x0, [x0]
1156 Base = N;
1157 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1158 return true;
1159}
1160
1161/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1162/// immediate" address. This should only match when there is an offset that
1163/// is not valid for a scaled immediate addressing mode. The "Size" argument
1164/// is the size in bytes of the memory reference, which is needed here to know
1165/// what is valid for a scaled immediate.
1166bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1167 SDValue &Base,
1168 SDValue &OffImm) {
1169 if (!CurDAG->isBaseWithConstantOffset(N))
1170 return false;
1171 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1172 int64_t RHSC = RHS->getSExtValue();
1173 if (RHSC >= -256 && RHSC < 256) {
1174 Base = N.getOperand(0);
1175 if (Base.getOpcode() == ISD::FrameIndex) {
1176 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1177 const TargetLowering *TLI = getTargetLowering();
1178 Base = CurDAG->getTargetFrameIndex(
1179 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1180 }
1181 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1182 return true;
1183 }
1184 }
1185 return false;
1186}
1187
1189 SDLoc dl(N);
1190 SDValue ImpDef = SDValue(
1191 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1192 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1193 N);
1194}
1195
1196/// Check if the given SHL node (\p N), can be used to form an
1197/// extended register for an addressing mode.
1198bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1199 bool WantExtend, SDValue &Offset,
1200 SDValue &SignExtend) {
1201 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1202 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1203 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1204 return false;
1205
1206 SDLoc dl(N);
1207 if (WantExtend) {
1209 getExtendTypeForNode(N.getOperand(0), true);
1211 return false;
1212
1213 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1214 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1215 MVT::i32);
1216 } else {
1217 Offset = N.getOperand(0);
1218 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1219 }
1220
1221 unsigned LegalShiftVal = Log2_32(Size);
1222 unsigned ShiftVal = CSD->getZExtValue();
1223
1224 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1225 return false;
1226
1227 return isWorthFoldingAddr(N, Size);
1228}
1229
1230bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1232 SDValue &SignExtend,
1233 SDValue &DoShift) {
1234 if (N.getOpcode() != ISD::ADD)
1235 return false;
1236 SDValue LHS = N.getOperand(0);
1237 SDValue RHS = N.getOperand(1);
1238 SDLoc dl(N);
1239
1240 // We don't want to match immediate adds here, because they are better lowered
1241 // to the register-immediate addressing modes.
1242 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1243 return false;
1244
1245 // Check if this particular node is reused in any non-memory related
1246 // operation. If yes, do not try to fold this node into the address
1247 // computation, since the computation will be kept.
1248 const SDNode *Node = N.getNode();
1249 for (SDNode *UI : Node->uses()) {
1250 if (!isa<MemSDNode>(*UI))
1251 return false;
1252 }
1253
1254 // Remember if it is worth folding N when it produces extended register.
1255 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1256
1257 // Try to match a shifted extend on the RHS.
1258 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1259 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1260 Base = LHS;
1261 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1262 return true;
1263 }
1264
1265 // Try to match a shifted extend on the LHS.
1266 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1267 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1268 Base = RHS;
1269 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1270 return true;
1271 }
1272
1273 // There was no shift, whatever else we find.
1274 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1275
1277 // Try to match an unshifted extend on the LHS.
1278 if (IsExtendedRegisterWorthFolding &&
1279 (Ext = getExtendTypeForNode(LHS, true)) !=
1281 Base = RHS;
1282 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1283 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1284 MVT::i32);
1285 if (isWorthFoldingAddr(LHS, Size))
1286 return true;
1287 }
1288
1289 // Try to match an unshifted extend on the RHS.
1290 if (IsExtendedRegisterWorthFolding &&
1291 (Ext = getExtendTypeForNode(RHS, true)) !=
1293 Base = LHS;
1294 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1295 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1296 MVT::i32);
1297 if (isWorthFoldingAddr(RHS, Size))
1298 return true;
1299 }
1300
1301 return false;
1302}
1303
1304// Check if the given immediate is preferred by ADD. If an immediate can be
1305// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1306// encoded by one MOVZ, return true.
1307static bool isPreferredADD(int64_t ImmOff) {
1308 // Constant in [0x0, 0xfff] can be encoded in ADD.
1309 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1310 return true;
1311 // Check if it can be encoded in an "ADD LSL #12".
1312 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1313 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1314 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1315 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1316 return false;
1317}
1318
1319bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1321 SDValue &SignExtend,
1322 SDValue &DoShift) {
1323 if (N.getOpcode() != ISD::ADD)
1324 return false;
1325 SDValue LHS = N.getOperand(0);
1326 SDValue RHS = N.getOperand(1);
1327 SDLoc DL(N);
1328
1329 // Check if this particular node is reused in any non-memory related
1330 // operation. If yes, do not try to fold this node into the address
1331 // computation, since the computation will be kept.
1332 const SDNode *Node = N.getNode();
1333 for (SDNode *UI : Node->uses()) {
1334 if (!isa<MemSDNode>(*UI))
1335 return false;
1336 }
1337
1338 // Watch out if RHS is a wide immediate, it can not be selected into
1339 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1340 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1341 // instructions like:
1342 // MOV X0, WideImmediate
1343 // ADD X1, BaseReg, X0
1344 // LDR X2, [X1, 0]
1345 // For such situation, using [BaseReg, XReg] addressing mode can save one
1346 // ADD/SUB:
1347 // MOV X0, WideImmediate
1348 // LDR X2, [BaseReg, X0]
1349 if (isa<ConstantSDNode>(RHS)) {
1350 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1351 // Skip the immediate can be selected by load/store addressing mode.
1352 // Also skip the immediate can be encoded by a single ADD (SUB is also
1353 // checked by using -ImmOff).
1354 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1355 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1356 return false;
1357
1358 SDValue Ops[] = { RHS };
1359 SDNode *MOVI =
1360 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1361 SDValue MOVIV = SDValue(MOVI, 0);
1362 // This ADD of two X register will be selected into [Reg+Reg] mode.
1363 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1364 }
1365
1366 // Remember if it is worth folding N when it produces extended register.
1367 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1368
1369 // Try to match a shifted extend on the RHS.
1370 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1371 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1372 Base = LHS;
1373 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1374 return true;
1375 }
1376
1377 // Try to match a shifted extend on the LHS.
1378 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1379 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1380 Base = RHS;
1381 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1382 return true;
1383 }
1384
1385 // Match any non-shifted, non-extend, non-immediate add expression.
1386 Base = LHS;
1387 Offset = RHS;
1388 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1389 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1390 // Reg1 + Reg2 is free: no check needed.
1391 return true;
1392}
1393
1394SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1395 static const unsigned RegClassIDs[] = {
1396 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1397 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1398 AArch64::dsub2, AArch64::dsub3};
1399
1400 return createTuple(Regs, RegClassIDs, SubRegs);
1401}
1402
1403SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1404 static const unsigned RegClassIDs[] = {
1405 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1406 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1407 AArch64::qsub2, AArch64::qsub3};
1408
1409 return createTuple(Regs, RegClassIDs, SubRegs);
1410}
1411
1412SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1413 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1414 AArch64::ZPR3RegClassID,
1415 AArch64::ZPR4RegClassID};
1416 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1417 AArch64::zsub2, AArch64::zsub3};
1418
1419 return createTuple(Regs, RegClassIDs, SubRegs);
1420}
1421
1422SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1423 assert(Regs.size() == 2 || Regs.size() == 4);
1424
1425 // The createTuple interface requires 3 RegClassIDs for each possible
1426 // tuple type even though we only have them for ZPR2 and ZPR4.
1427 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1428 AArch64::ZPR4Mul4RegClassID};
1429 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1430 AArch64::zsub2, AArch64::zsub3};
1431 return createTuple(Regs, RegClassIDs, SubRegs);
1432}
1433
1434SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1435 const unsigned RegClassIDs[],
1436 const unsigned SubRegs[]) {
1437 // There's no special register-class for a vector-list of 1 element: it's just
1438 // a vector.
1439 if (Regs.size() == 1)
1440 return Regs[0];
1441
1442 assert(Regs.size() >= 2 && Regs.size() <= 4);
1443
1444 SDLoc DL(Regs[0]);
1445
1447
1448 // First operand of REG_SEQUENCE is the desired RegClass.
1449 Ops.push_back(
1450 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1451
1452 // Then we get pairs of source & subregister-position for the components.
1453 for (unsigned i = 0; i < Regs.size(); ++i) {
1454 Ops.push_back(Regs[i]);
1455 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1456 }
1457
1458 SDNode *N =
1459 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1460 return SDValue(N, 0);
1461}
1462
1463void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1464 bool isExt) {
1465 SDLoc dl(N);
1466 EVT VT = N->getValueType(0);
1467
1468 unsigned ExtOff = isExt;
1469
1470 // Form a REG_SEQUENCE to force register allocation.
1471 unsigned Vec0Off = ExtOff + 1;
1472 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1473 N->op_begin() + Vec0Off + NumVecs);
1474 SDValue RegSeq = createQTuple(Regs);
1475
1477 if (isExt)
1478 Ops.push_back(N->getOperand(1));
1479 Ops.push_back(RegSeq);
1480 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1481 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1482}
1483
1484bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1485 LoadSDNode *LD = cast<LoadSDNode>(N);
1486 if (LD->isUnindexed())
1487 return false;
1488 EVT VT = LD->getMemoryVT();
1489 EVT DstVT = N->getValueType(0);
1490 ISD::MemIndexedMode AM = LD->getAddressingMode();
1491 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1492
1493 // We're not doing validity checking here. That was done when checking
1494 // if we should mark the load as indexed or not. We're just selecting
1495 // the right instruction.
1496 unsigned Opcode = 0;
1497
1498 ISD::LoadExtType ExtType = LD->getExtensionType();
1499 bool InsertTo64 = false;
1500 if (VT == MVT::i64)
1501 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1502 else if (VT == MVT::i32) {
1503 if (ExtType == ISD::NON_EXTLOAD)
1504 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1505 else if (ExtType == ISD::SEXTLOAD)
1506 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1507 else {
1508 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1509 InsertTo64 = true;
1510 // The result of the load is only i32. It's the subreg_to_reg that makes
1511 // it into an i64.
1512 DstVT = MVT::i32;
1513 }
1514 } else if (VT == MVT::i16) {
1515 if (ExtType == ISD::SEXTLOAD) {
1516 if (DstVT == MVT::i64)
1517 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1518 else
1519 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1520 } else {
1521 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1522 InsertTo64 = DstVT == MVT::i64;
1523 // The result of the load is only i32. It's the subreg_to_reg that makes
1524 // it into an i64.
1525 DstVT = MVT::i32;
1526 }
1527 } else if (VT == MVT::i8) {
1528 if (ExtType == ISD::SEXTLOAD) {
1529 if (DstVT == MVT::i64)
1530 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1531 else
1532 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1533 } else {
1534 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1535 InsertTo64 = DstVT == MVT::i64;
1536 // The result of the load is only i32. It's the subreg_to_reg that makes
1537 // it into an i64.
1538 DstVT = MVT::i32;
1539 }
1540 } else if (VT == MVT::f16) {
1541 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1542 } else if (VT == MVT::bf16) {
1543 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1544 } else if (VT == MVT::f32) {
1545 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1546 } else if (VT == MVT::f64 || VT.is64BitVector()) {
1547 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1548 } else if (VT.is128BitVector()) {
1549 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1550 } else
1551 return false;
1552 SDValue Chain = LD->getChain();
1553 SDValue Base = LD->getBasePtr();
1554 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1555 int OffsetVal = (int)OffsetOp->getZExtValue();
1556 SDLoc dl(N);
1557 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1558 SDValue Ops[] = { Base, Offset, Chain };
1559 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1560 MVT::Other, Ops);
1561
1562 // Transfer memoperands.
1563 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1564 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1565
1566 // Either way, we're replacing the node, so tell the caller that.
1567 SDValue LoadedVal = SDValue(Res, 1);
1568 if (InsertTo64) {
1569 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1570 LoadedVal =
1571 SDValue(CurDAG->getMachineNode(
1572 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1573 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1574 SubReg),
1575 0);
1576 }
1577
1578 ReplaceUses(SDValue(N, 0), LoadedVal);
1579 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1580 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1581 CurDAG->RemoveDeadNode(N);
1582 return true;
1583}
1584
1585void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1586 unsigned SubRegIdx) {
1587 SDLoc dl(N);
1588 EVT VT = N->getValueType(0);
1589 SDValue Chain = N->getOperand(0);
1590
1591 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1592 Chain};
1593
1594 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1595
1596 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1597 SDValue SuperReg = SDValue(Ld, 0);
1598 for (unsigned i = 0; i < NumVecs; ++i)
1599 ReplaceUses(SDValue(N, i),
1600 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1601
1602 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1603
1604 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1605 // because it's too simple to have needed special treatment during lowering.
1606 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1607 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1608 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1609 }
1610
1611 CurDAG->RemoveDeadNode(N);
1612}
1613
1614void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1615 unsigned Opc, unsigned SubRegIdx) {
1616 SDLoc dl(N);
1617 EVT VT = N->getValueType(0);
1618 SDValue Chain = N->getOperand(0);
1619
1620 SDValue Ops[] = {N->getOperand(1), // Mem operand
1621 N->getOperand(2), // Incremental
1622 Chain};
1623
1624 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1625 MVT::Untyped, MVT::Other};
1626
1627 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1628
1629 // Update uses of write back register
1630 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1631
1632 // Update uses of vector list
1633 SDValue SuperReg = SDValue(Ld, 1);
1634 if (NumVecs == 1)
1635 ReplaceUses(SDValue(N, 0), SuperReg);
1636 else
1637 for (unsigned i = 0; i < NumVecs; ++i)
1638 ReplaceUses(SDValue(N, i),
1639 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1640
1641 // Update the chain
1642 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1643 CurDAG->RemoveDeadNode(N);
1644}
1645
1646/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1647/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1648/// new Base and an SDValue representing the new offset.
1649std::tuple<unsigned, SDValue, SDValue>
1650AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1651 unsigned Opc_ri,
1652 const SDValue &OldBase,
1653 const SDValue &OldOffset,
1654 unsigned Scale) {
1655 SDValue NewBase = OldBase;
1656 SDValue NewOffset = OldOffset;
1657 // Detect a possible Reg+Imm addressing mode.
1658 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1659 N, OldBase, NewBase, NewOffset);
1660
1661 // Detect a possible reg+reg addressing mode, but only if we haven't already
1662 // detected a Reg+Imm one.
1663 const bool IsRegReg =
1664 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1665
1666 // Select the instruction.
1667 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1668}
1669
1670enum class SelectTypeKind {
1671 Int1 = 0,
1672 Int = 1,
1673 FP = 2,
1674 AnyType = 3,
1675};
1676
1677/// This function selects an opcode from a list of opcodes, which is
1678/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1679/// element types, in this order.
1680template <SelectTypeKind Kind>
1681static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1682 // Only match scalable vector VTs
1683 if (!VT.isScalableVector())
1684 return 0;
1685
1686 EVT EltVT = VT.getVectorElementType();
1687 unsigned Key = VT.getVectorMinNumElements();
1688 switch (Kind) {
1690 break;
1692 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1693 EltVT != MVT::i64)
1694 return 0;
1695 break;
1697 if (EltVT != MVT::i1)
1698 return 0;
1699 break;
1700 case SelectTypeKind::FP:
1701 if (EltVT == MVT::bf16)
1702 Key = 16;
1703 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1704 EltVT != MVT::f64)
1705 return 0;
1706 break;
1707 }
1708
1709 unsigned Offset;
1710 switch (Key) {
1711 case 16: // 8-bit or bf16
1712 Offset = 0;
1713 break;
1714 case 8: // 16-bit
1715 Offset = 1;
1716 break;
1717 case 4: // 32-bit
1718 Offset = 2;
1719 break;
1720 case 2: // 64-bit
1721 Offset = 3;
1722 break;
1723 default:
1724 return 0;
1725 }
1726
1727 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1728}
1729
1730// This function is almost identical to SelectWhilePair, but has an
1731// extra check on the range of the immediate operand.
1732// TODO: Merge these two functions together at some point?
1733void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1734 // Immediate can be either 0 or 1.
1735 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1736 if (Imm->getZExtValue() > 1)
1737 return;
1738
1739 SDLoc DL(N);
1740 EVT VT = N->getValueType(0);
1741 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1742 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1743 SDValue SuperReg = SDValue(WhilePair, 0);
1744
1745 for (unsigned I = 0; I < 2; ++I)
1746 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1747 AArch64::psub0 + I, DL, VT, SuperReg));
1748
1749 CurDAG->RemoveDeadNode(N);
1750}
1751
1752void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1753 SDLoc DL(N);
1754 EVT VT = N->getValueType(0);
1755
1756 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1757
1758 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1759 SDValue SuperReg = SDValue(WhilePair, 0);
1760
1761 for (unsigned I = 0; I < 2; ++I)
1762 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1763 AArch64::psub0 + I, DL, VT, SuperReg));
1764
1765 CurDAG->RemoveDeadNode(N);
1766}
1767
1768void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1769 unsigned Opcode) {
1770 EVT VT = N->getValueType(0);
1771 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1772 SDValue Ops = createZTuple(Regs);
1773 SDLoc DL(N);
1774 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1775 SDValue SuperReg = SDValue(Intrinsic, 0);
1776 for (unsigned i = 0; i < NumVecs; ++i)
1777 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1778 AArch64::zsub0 + i, DL, VT, SuperReg));
1779
1780 CurDAG->RemoveDeadNode(N);
1781}
1782
1783void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1784 unsigned NumVecs,
1785 bool IsZmMulti,
1786 unsigned Opcode,
1787 bool HasPred) {
1788 assert(Opcode != 0 && "Unexpected opcode");
1789
1790 SDLoc DL(N);
1791 EVT VT = N->getValueType(0);
1792 unsigned FirstVecIdx = HasPred ? 2 : 1;
1793
1794 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1795 SmallVector<SDValue, 4> Regs(N->op_begin() + StartIdx,
1796 N->op_begin() + StartIdx + NumVecs);
1797 return createZMulTuple(Regs);
1798 };
1799
1800 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1801
1802 SDValue Zm;
1803 if (IsZmMulti)
1804 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1805 else
1806 Zm = N->getOperand(NumVecs + FirstVecIdx);
1807
1809 if (HasPred)
1810 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1811 N->getOperand(1), Zdn, Zm);
1812 else
1813 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1814 SDValue SuperReg = SDValue(Intrinsic, 0);
1815 for (unsigned i = 0; i < NumVecs; ++i)
1816 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1817 AArch64::zsub0 + i, DL, VT, SuperReg));
1818
1819 CurDAG->RemoveDeadNode(N);
1820}
1821
1822void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1823 unsigned Scale, unsigned Opc_ri,
1824 unsigned Opc_rr, bool IsIntr) {
1825 assert(Scale < 5 && "Invalid scaling value.");
1826 SDLoc DL(N);
1827 EVT VT = N->getValueType(0);
1828 SDValue Chain = N->getOperand(0);
1829
1830 // Optimize addressing mode.
1832 unsigned Opc;
1833 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1834 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1835 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1836
1837 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
1838 Base, // Memory operand
1839 Offset, Chain};
1840
1841 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1842
1843 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1844 SDValue SuperReg = SDValue(Load, 0);
1845 for (unsigned i = 0; i < NumVecs; ++i)
1846 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1847 AArch64::zsub0 + i, DL, VT, SuperReg));
1848
1849 // Copy chain
1850 unsigned ChainIdx = NumVecs;
1851 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1852 CurDAG->RemoveDeadNode(N);
1853}
1854
1855void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
1856 unsigned NumVecs,
1857 unsigned Scale,
1858 unsigned Opc_ri,
1859 unsigned Opc_rr) {
1860 assert(Scale < 4 && "Invalid scaling value.");
1861 SDLoc DL(N);
1862 EVT VT = N->getValueType(0);
1863 SDValue Chain = N->getOperand(0);
1864
1865 SDValue PNg = N->getOperand(2);
1866 SDValue Base = N->getOperand(3);
1867 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
1868 unsigned Opc;
1869 std::tie(Opc, Base, Offset) =
1870 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
1871
1872 SDValue Ops[] = {PNg, // Predicate-as-counter
1873 Base, // Memory operand
1874 Offset, Chain};
1875
1876 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1877
1878 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1879 SDValue SuperReg = SDValue(Load, 0);
1880 for (unsigned i = 0; i < NumVecs; ++i)
1881 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1882 AArch64::zsub0 + i, DL, VT, SuperReg));
1883
1884 // Copy chain
1885 unsigned ChainIdx = NumVecs;
1886 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1887 CurDAG->RemoveDeadNode(N);
1888}
1889
1890void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
1891 unsigned Opcode) {
1892 if (N->getValueType(0) != MVT::nxv4f32)
1893 return;
1894 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
1895}
1896
1897void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
1898 unsigned NumOutVecs,
1899 unsigned Opc, uint32_t MaxImm) {
1900 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
1901 if (Imm->getZExtValue() > MaxImm)
1902 return;
1903
1904 SDValue ZtValue;
1905 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
1906 return;
1907 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
1908 SDLoc DL(Node);
1909 EVT VT = Node->getValueType(0);
1910
1912 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
1913 SDValue SuperReg = SDValue(Instruction, 0);
1914
1915 for (unsigned I = 0; I < NumOutVecs; ++I)
1916 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
1917 AArch64::zsub0 + I, DL, VT, SuperReg));
1918
1919 // Copy chain
1920 unsigned ChainIdx = NumOutVecs;
1921 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
1922 CurDAG->RemoveDeadNode(Node);
1923}
1924
1925void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
1926 unsigned Op) {
1927 SDLoc DL(N);
1928 EVT VT = N->getValueType(0);
1929
1930 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1931 SDValue Zd = createZMulTuple(Regs);
1932 SDValue Zn = N->getOperand(1 + NumVecs);
1933 SDValue Zm = N->getOperand(2 + NumVecs);
1934
1935 SDValue Ops[] = {Zd, Zn, Zm};
1936
1937 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
1938 SDValue SuperReg = SDValue(Intrinsic, 0);
1939 for (unsigned i = 0; i < NumVecs; ++i)
1940 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1941 AArch64::zsub0 + i, DL, VT, SuperReg));
1942
1943 CurDAG->RemoveDeadNode(N);
1944}
1945
1946bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
1947 switch (BaseReg) {
1948 default:
1949 return false;
1950 case AArch64::ZA:
1951 case AArch64::ZAB0:
1952 if (TileNum == 0)
1953 break;
1954 return false;
1955 case AArch64::ZAH0:
1956 if (TileNum <= 1)
1957 break;
1958 return false;
1959 case AArch64::ZAS0:
1960 if (TileNum <= 3)
1961 break;
1962 return false;
1963 case AArch64::ZAD0:
1964 if (TileNum <= 7)
1965 break;
1966 return false;
1967 }
1968
1969 BaseReg += TileNum;
1970 return true;
1971}
1972
1973template <unsigned MaxIdx, unsigned Scale>
1974void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
1975 unsigned BaseReg, unsigned Op) {
1976 unsigned TileNum = 0;
1977 if (BaseReg != AArch64::ZA)
1978 TileNum = N->getConstantOperandVal(2);
1979
1980 if (!SelectSMETile(BaseReg, TileNum))
1981 return;
1982
1983 SDValue SliceBase, Base, Offset;
1984 if (BaseReg == AArch64::ZA)
1985 SliceBase = N->getOperand(2);
1986 else
1987 SliceBase = N->getOperand(3);
1988
1989 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
1990 return;
1991
1992 SDLoc DL(N);
1993 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
1994 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
1995 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
1996
1997 EVT VT = N->getValueType(0);
1998 for (unsigned I = 0; I < NumVecs; ++I)
1999 ReplaceUses(SDValue(N, I),
2000 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2001 SDValue(Mov, 0)));
2002 // Copy chain
2003 unsigned ChainIdx = NumVecs;
2004 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2005 CurDAG->RemoveDeadNode(N);
2006}
2007
2008void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2009 unsigned Op, unsigned MaxIdx,
2010 unsigned Scale, unsigned BaseReg) {
2011 // Slice can be in different positions
2012 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2013 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2014 SDValue SliceBase = N->getOperand(2);
2015 if (BaseReg != AArch64::ZA)
2016 SliceBase = N->getOperand(3);
2017
2019 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2020 return;
2021 // The correct Za tile number is computed in Machine Instruction
2022 // See EmitZAInstr
2023 // DAG cannot select Za tile as an output register with ZReg
2024 SDLoc DL(N);
2026 if (BaseReg != AArch64::ZA )
2027 Ops.push_back(N->getOperand(2));
2028 Ops.push_back(Base);
2029 Ops.push_back(Offset);
2030 Ops.push_back(N->getOperand(0)); //Chain
2031 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2032
2033 EVT VT = N->getValueType(0);
2034 for (unsigned I = 0; I < NumVecs; ++I)
2035 ReplaceUses(SDValue(N, I),
2036 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2037 SDValue(Mov, 0)));
2038
2039 // Copy chain
2040 unsigned ChainIdx = NumVecs;
2041 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2042 CurDAG->RemoveDeadNode(N);
2043}
2044
2045void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2046 unsigned NumOutVecs,
2047 bool IsTupleInput,
2048 unsigned Opc) {
2049 SDLoc DL(N);
2050 EVT VT = N->getValueType(0);
2051 unsigned NumInVecs = N->getNumOperands() - 1;
2052
2054 if (IsTupleInput) {
2055 assert((NumInVecs == 2 || NumInVecs == 4) &&
2056 "Don't know how to handle multi-register input!");
2057 SmallVector<SDValue, 4> Regs(N->op_begin() + 1,
2058 N->op_begin() + 1 + NumInVecs);
2059 Ops.push_back(createZMulTuple(Regs));
2060 } else {
2061 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2062 for (unsigned I = 0; I < NumInVecs; I++)
2063 Ops.push_back(N->getOperand(1 + I));
2064 }
2065
2066 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2067 SDValue SuperReg = SDValue(Res, 0);
2068
2069 for (unsigned I = 0; I < NumOutVecs; I++)
2070 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2071 AArch64::zsub0 + I, DL, VT, SuperReg));
2072 CurDAG->RemoveDeadNode(N);
2073}
2074
2075void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2076 unsigned Opc) {
2077 SDLoc dl(N);
2078 EVT VT = N->getOperand(2)->getValueType(0);
2079
2080 // Form a REG_SEQUENCE to force register allocation.
2081 bool Is128Bit = VT.getSizeInBits() == 128;
2082 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2083 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2084
2085 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2086 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2087
2088 // Transfer memoperands.
2089 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2090 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2091
2092 ReplaceNode(N, St);
2093}
2094
2095void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2096 unsigned Scale, unsigned Opc_rr,
2097 unsigned Opc_ri) {
2098 SDLoc dl(N);
2099
2100 // Form a REG_SEQUENCE to force register allocation.
2101 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2102 SDValue RegSeq = createZTuple(Regs);
2103
2104 // Optimize addressing mode.
2105 unsigned Opc;
2107 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2108 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2109 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2110
2111 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2112 Base, // address
2113 Offset, // offset
2114 N->getOperand(0)}; // chain
2115 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2116
2117 ReplaceNode(N, St);
2118}
2119
2120bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2121 SDValue &OffImm) {
2122 SDLoc dl(N);
2123 const DataLayout &DL = CurDAG->getDataLayout();
2124 const TargetLowering *TLI = getTargetLowering();
2125
2126 // Try to match it for the frame address
2127 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2128 int FI = FINode->getIndex();
2129 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2130 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2131 return true;
2132 }
2133
2134 return false;
2135}
2136
2137void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2138 unsigned Opc) {
2139 SDLoc dl(N);
2140 EVT VT = N->getOperand(2)->getValueType(0);
2141 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2142 MVT::Other}; // Type for the Chain
2143
2144 // Form a REG_SEQUENCE to force register allocation.
2145 bool Is128Bit = VT.getSizeInBits() == 128;
2146 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2147 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2148
2149 SDValue Ops[] = {RegSeq,
2150 N->getOperand(NumVecs + 1), // base register
2151 N->getOperand(NumVecs + 2), // Incremental
2152 N->getOperand(0)}; // Chain
2153 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2154
2155 ReplaceNode(N, St);
2156}
2157
2158namespace {
2159/// WidenVector - Given a value in the V64 register class, produce the
2160/// equivalent value in the V128 register class.
2161class WidenVector {
2162 SelectionDAG &DAG;
2163
2164public:
2165 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2166
2167 SDValue operator()(SDValue V64Reg) {
2168 EVT VT = V64Reg.getValueType();
2169 unsigned NarrowSize = VT.getVectorNumElements();
2170 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2171 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2172 SDLoc DL(V64Reg);
2173
2174 SDValue Undef =
2175 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2176 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2177 }
2178};
2179} // namespace
2180
2181/// NarrowVector - Given a value in the V128 register class, produce the
2182/// equivalent value in the V64 register class.
2184 EVT VT = V128Reg.getValueType();
2185 unsigned WideSize = VT.getVectorNumElements();
2186 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2187 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2188
2189 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2190 V128Reg);
2191}
2192
2193void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2194 unsigned Opc) {
2195 SDLoc dl(N);
2196 EVT VT = N->getValueType(0);
2197 bool Narrow = VT.getSizeInBits() == 64;
2198
2199 // Form a REG_SEQUENCE to force register allocation.
2200 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2201
2202 if (Narrow)
2203 transform(Regs, Regs.begin(),
2204 WidenVector(*CurDAG));
2205
2206 SDValue RegSeq = createQTuple(Regs);
2207
2208 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2209
2210 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2211
2212 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2213 N->getOperand(NumVecs + 3), N->getOperand(0)};
2214 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2215 SDValue SuperReg = SDValue(Ld, 0);
2216
2217 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2218 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2219 AArch64::qsub2, AArch64::qsub3 };
2220 for (unsigned i = 0; i < NumVecs; ++i) {
2221 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2222 if (Narrow)
2223 NV = NarrowVector(NV, *CurDAG);
2224 ReplaceUses(SDValue(N, i), NV);
2225 }
2226
2227 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2228 CurDAG->RemoveDeadNode(N);
2229}
2230
2231void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2232 unsigned Opc) {
2233 SDLoc dl(N);
2234 EVT VT = N->getValueType(0);
2235 bool Narrow = VT.getSizeInBits() == 64;
2236
2237 // Form a REG_SEQUENCE to force register allocation.
2238 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2239
2240 if (Narrow)
2241 transform(Regs, Regs.begin(),
2242 WidenVector(*CurDAG));
2243
2244 SDValue RegSeq = createQTuple(Regs);
2245
2246 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2247 RegSeq->getValueType(0), MVT::Other};
2248
2249 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2250
2251 SDValue Ops[] = {RegSeq,
2252 CurDAG->getTargetConstant(LaneNo, dl,
2253 MVT::i64), // Lane Number
2254 N->getOperand(NumVecs + 2), // Base register
2255 N->getOperand(NumVecs + 3), // Incremental
2256 N->getOperand(0)};
2257 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2258
2259 // Update uses of the write back register
2260 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2261
2262 // Update uses of the vector list
2263 SDValue SuperReg = SDValue(Ld, 1);
2264 if (NumVecs == 1) {
2265 ReplaceUses(SDValue(N, 0),
2266 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2267 } else {
2268 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2269 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2270 AArch64::qsub2, AArch64::qsub3 };
2271 for (unsigned i = 0; i < NumVecs; ++i) {
2272 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2273 SuperReg);
2274 if (Narrow)
2275 NV = NarrowVector(NV, *CurDAG);
2276 ReplaceUses(SDValue(N, i), NV);
2277 }
2278 }
2279
2280 // Update the Chain
2281 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2282 CurDAG->RemoveDeadNode(N);
2283}
2284
2285void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2286 unsigned Opc) {
2287 SDLoc dl(N);
2288 EVT VT = N->getOperand(2)->getValueType(0);
2289 bool Narrow = VT.getSizeInBits() == 64;
2290
2291 // Form a REG_SEQUENCE to force register allocation.
2292 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2293
2294 if (Narrow)
2295 transform(Regs, Regs.begin(),
2296 WidenVector(*CurDAG));
2297
2298 SDValue RegSeq = createQTuple(Regs);
2299
2300 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2301
2302 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2303 N->getOperand(NumVecs + 3), N->getOperand(0)};
2304 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2305
2306 // Transfer memoperands.
2307 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2308 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2309
2310 ReplaceNode(N, St);
2311}
2312
2313void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2314 unsigned Opc) {
2315 SDLoc dl(N);
2316 EVT VT = N->getOperand(2)->getValueType(0);
2317 bool Narrow = VT.getSizeInBits() == 64;
2318
2319 // Form a REG_SEQUENCE to force register allocation.
2320 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2321
2322 if (Narrow)
2323 transform(Regs, Regs.begin(),
2324 WidenVector(*CurDAG));
2325
2326 SDValue RegSeq = createQTuple(Regs);
2327
2328 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2329 MVT::Other};
2330
2331 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2332
2333 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2334 N->getOperand(NumVecs + 2), // Base Register
2335 N->getOperand(NumVecs + 3), // Incremental
2336 N->getOperand(0)};
2337 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2338
2339 // Transfer memoperands.
2340 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2341 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2342
2343 ReplaceNode(N, St);
2344}
2345
2347 unsigned &Opc, SDValue &Opd0,
2348 unsigned &LSB, unsigned &MSB,
2349 unsigned NumberOfIgnoredLowBits,
2350 bool BiggerPattern) {
2351 assert(N->getOpcode() == ISD::AND &&
2352 "N must be a AND operation to call this function");
2353
2354 EVT VT = N->getValueType(0);
2355
2356 // Here we can test the type of VT and return false when the type does not
2357 // match, but since it is done prior to that call in the current context
2358 // we turned that into an assert to avoid redundant code.
2359 assert((VT == MVT::i32 || VT == MVT::i64) &&
2360 "Type checking must have been done before calling this function");
2361
2362 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2363 // changed the AND node to a 32-bit mask operation. We'll have to
2364 // undo that as part of the transform here if we want to catch all
2365 // the opportunities.
2366 // Currently the NumberOfIgnoredLowBits argument helps to recover
2367 // from these situations when matching bigger pattern (bitfield insert).
2368
2369 // For unsigned extracts, check for a shift right and mask
2370 uint64_t AndImm = 0;
2371 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2372 return false;
2373
2374 const SDNode *Op0 = N->getOperand(0).getNode();
2375
2376 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2377 // simplified. Try to undo that
2378 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2379
2380 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2381 if (AndImm & (AndImm + 1))
2382 return false;
2383
2384 bool ClampMSB = false;
2385 uint64_t SrlImm = 0;
2386 // Handle the SRL + ANY_EXTEND case.
2387 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2388 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2389 // Extend the incoming operand of the SRL to 64-bit.
2390 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2391 // Make sure to clamp the MSB so that we preserve the semantics of the
2392 // original operations.
2393 ClampMSB = true;
2394 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2396 SrlImm)) {
2397 // If the shift result was truncated, we can still combine them.
2398 Opd0 = Op0->getOperand(0).getOperand(0);
2399
2400 // Use the type of SRL node.
2401 VT = Opd0->getValueType(0);
2402 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2403 Opd0 = Op0->getOperand(0);
2404 ClampMSB = (VT == MVT::i32);
2405 } else if (BiggerPattern) {
2406 // Let's pretend a 0 shift right has been performed.
2407 // The resulting code will be at least as good as the original one
2408 // plus it may expose more opportunities for bitfield insert pattern.
2409 // FIXME: Currently we limit this to the bigger pattern, because
2410 // some optimizations expect AND and not UBFM.
2411 Opd0 = N->getOperand(0);
2412 } else
2413 return false;
2414
2415 // Bail out on large immediates. This happens when no proper
2416 // combining/constant folding was performed.
2417 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2418 LLVM_DEBUG(
2419 (dbgs() << N
2420 << ": Found large shift immediate, this should not happen\n"));
2421 return false;
2422 }
2423
2424 LSB = SrlImm;
2425 MSB = SrlImm +
2426 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2427 : llvm::countr_one<uint64_t>(AndImm)) -
2428 1;
2429 if (ClampMSB)
2430 // Since we're moving the extend before the right shift operation, we need
2431 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2432 // the zeros which would get shifted in with the original right shift
2433 // operation.
2434 MSB = MSB > 31 ? 31 : MSB;
2435
2436 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2437 return true;
2438}
2439
2440static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2441 SDValue &Opd0, unsigned &Immr,
2442 unsigned &Imms) {
2443 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2444
2445 EVT VT = N->getValueType(0);
2446 unsigned BitWidth = VT.getSizeInBits();
2447 assert((VT == MVT::i32 || VT == MVT::i64) &&
2448 "Type checking must have been done before calling this function");
2449
2450 SDValue Op = N->getOperand(0);
2451 if (Op->getOpcode() == ISD::TRUNCATE) {
2452 Op = Op->getOperand(0);
2453 VT = Op->getValueType(0);
2454 BitWidth = VT.getSizeInBits();
2455 }
2456
2457 uint64_t ShiftImm;
2458 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2459 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2460 return false;
2461
2462 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2463 if (ShiftImm + Width > BitWidth)
2464 return false;
2465
2466 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2467 Opd0 = Op.getOperand(0);
2468 Immr = ShiftImm;
2469 Imms = ShiftImm + Width - 1;
2470 return true;
2471}
2472
2473static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2474 SDValue &Opd0, unsigned &LSB,
2475 unsigned &MSB) {
2476 // We are looking for the following pattern which basically extracts several
2477 // continuous bits from the source value and places it from the LSB of the
2478 // destination value, all other bits of the destination value or set to zero:
2479 //
2480 // Value2 = AND Value, MaskImm
2481 // SRL Value2, ShiftImm
2482 //
2483 // with MaskImm >> ShiftImm to search for the bit width.
2484 //
2485 // This gets selected into a single UBFM:
2486 //
2487 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2488 //
2489
2490 if (N->getOpcode() != ISD::SRL)
2491 return false;
2492
2493 uint64_t AndMask = 0;
2494 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2495 return false;
2496
2497 Opd0 = N->getOperand(0).getOperand(0);
2498
2499 uint64_t SrlImm = 0;
2500 if (!isIntImmediate(N->getOperand(1), SrlImm))
2501 return false;
2502
2503 // Check whether we really have several bits extract here.
2504 if (!isMask_64(AndMask >> SrlImm))
2505 return false;
2506
2507 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2508 LSB = SrlImm;
2509 MSB = llvm::Log2_64(AndMask);
2510 return true;
2511}
2512
2513static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2514 unsigned &Immr, unsigned &Imms,
2515 bool BiggerPattern) {
2516 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2517 "N must be a SHR/SRA operation to call this function");
2518
2519 EVT VT = N->getValueType(0);
2520
2521 // Here we can test the type of VT and return false when the type does not
2522 // match, but since it is done prior to that call in the current context
2523 // we turned that into an assert to avoid redundant code.
2524 assert((VT == MVT::i32 || VT == MVT::i64) &&
2525 "Type checking must have been done before calling this function");
2526
2527 // Check for AND + SRL doing several bits extract.
2528 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2529 return true;
2530
2531 // We're looking for a shift of a shift.
2532 uint64_t ShlImm = 0;
2533 uint64_t TruncBits = 0;
2534 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2535 Opd0 = N->getOperand(0).getOperand(0);
2536 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2537 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2538 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2539 // be considered as setting high 32 bits as zero. Our strategy here is to
2540 // always generate 64bit UBFM. This consistency will help the CSE pass
2541 // later find more redundancy.
2542 Opd0 = N->getOperand(0).getOperand(0);
2543 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2544 VT = Opd0.getValueType();
2545 assert(VT == MVT::i64 && "the promoted type should be i64");
2546 } else if (BiggerPattern) {
2547 // Let's pretend a 0 shift left has been performed.
2548 // FIXME: Currently we limit this to the bigger pattern case,
2549 // because some optimizations expect AND and not UBFM
2550 Opd0 = N->getOperand(0);
2551 } else
2552 return false;
2553
2554 // Missing combines/constant folding may have left us with strange
2555 // constants.
2556 if (ShlImm >= VT.getSizeInBits()) {
2557 LLVM_DEBUG(
2558 (dbgs() << N
2559 << ": Found large shift immediate, this should not happen\n"));
2560 return false;
2561 }
2562
2563 uint64_t SrlImm = 0;
2564 if (!isIntImmediate(N->getOperand(1), SrlImm))
2565 return false;
2566
2567 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2568 "bad amount in shift node!");
2569 int immr = SrlImm - ShlImm;
2570 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2571 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2572 // SRA requires a signed extraction
2573 if (VT == MVT::i32)
2574 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2575 else
2576 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2577 return true;
2578}
2579
2580bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2581 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2582
2583 EVT VT = N->getValueType(0);
2584 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2585 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2586 return false;
2587
2588 uint64_t ShiftImm;
2589 SDValue Op = N->getOperand(0);
2590 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2591 return false;
2592
2593 SDLoc dl(N);
2594 // Extend the incoming operand of the shift to 64-bits.
2595 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2596 unsigned Immr = ShiftImm;
2597 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2598 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2599 CurDAG->getTargetConstant(Imms, dl, VT)};
2600 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2601 return true;
2602}
2603
2604static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2605 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2606 unsigned NumberOfIgnoredLowBits = 0,
2607 bool BiggerPattern = false) {
2608 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2609 return false;
2610
2611 switch (N->getOpcode()) {
2612 default:
2613 if (!N->isMachineOpcode())
2614 return false;
2615 break;
2616 case ISD::AND:
2617 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2618 NumberOfIgnoredLowBits, BiggerPattern);
2619 case ISD::SRL:
2620 case ISD::SRA:
2621 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2622
2624 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2625 }
2626
2627 unsigned NOpc = N->getMachineOpcode();
2628 switch (NOpc) {
2629 default:
2630 return false;
2631 case AArch64::SBFMWri:
2632 case AArch64::UBFMWri:
2633 case AArch64::SBFMXri:
2634 case AArch64::UBFMXri:
2635 Opc = NOpc;
2636 Opd0 = N->getOperand(0);
2637 Immr = N->getConstantOperandVal(1);
2638 Imms = N->getConstantOperandVal(2);
2639 return true;
2640 }
2641 // Unreachable
2642 return false;
2643}
2644
2645bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2646 unsigned Opc, Immr, Imms;
2647 SDValue Opd0;
2648 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2649 return false;
2650
2651 EVT VT = N->getValueType(0);
2652 SDLoc dl(N);
2653
2654 // If the bit extract operation is 64bit but the original type is 32bit, we
2655 // need to add one EXTRACT_SUBREG.
2656 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2657 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2658 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2659
2660 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2661 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2662 MVT::i32, SDValue(BFM, 0));
2663 ReplaceNode(N, Inner.getNode());
2664 return true;
2665 }
2666
2667 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2668 CurDAG->getTargetConstant(Imms, dl, VT)};
2669 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2670 return true;
2671}
2672
2673/// Does DstMask form a complementary pair with the mask provided by
2674/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2675/// this asks whether DstMask zeroes precisely those bits that will be set by
2676/// the other half.
2677static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2678 unsigned NumberOfIgnoredHighBits, EVT VT) {
2679 assert((VT == MVT::i32 || VT == MVT::i64) &&
2680 "i32 or i64 mask type expected!");
2681 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2682
2683 APInt SignificantDstMask = APInt(BitWidth, DstMask);
2684 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2685
2686 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2687 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2688}
2689
2690// Look for bits that will be useful for later uses.
2691// A bit is consider useless as soon as it is dropped and never used
2692// before it as been dropped.
2693// E.g., looking for useful bit of x
2694// 1. y = x & 0x7
2695// 2. z = y >> 2
2696// After #1, x useful bits are 0x7, then the useful bits of x, live through
2697// y.
2698// After #2, the useful bits of x are 0x4.
2699// However, if x is used on an unpredicatable instruction, then all its bits
2700// are useful.
2701// E.g.
2702// 1. y = x & 0x7
2703// 2. z = y >> 2
2704// 3. str x, [@x]
2705static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2706
2708 unsigned Depth) {
2709 uint64_t Imm =
2710 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2711 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2712 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2713 getUsefulBits(Op, UsefulBits, Depth + 1);
2714}
2715
2717 uint64_t Imm, uint64_t MSB,
2718 unsigned Depth) {
2719 // inherit the bitwidth value
2720 APInt OpUsefulBits(UsefulBits);
2721 OpUsefulBits = 1;
2722
2723 if (MSB >= Imm) {
2724 OpUsefulBits <<= MSB - Imm + 1;
2725 --OpUsefulBits;
2726 // The interesting part will be in the lower part of the result
2727 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2728 // The interesting part was starting at Imm in the argument
2729 OpUsefulBits <<= Imm;
2730 } else {
2731 OpUsefulBits <<= MSB + 1;
2732 --OpUsefulBits;
2733 // The interesting part will be shifted in the result
2734 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2735 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2736 // The interesting part was at zero in the argument
2737 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2738 }
2739
2740 UsefulBits &= OpUsefulBits;
2741}
2742
2743static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2744 unsigned Depth) {
2745 uint64_t Imm =
2746 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2747 uint64_t MSB =
2748 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2749
2750 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2751}
2752
2754 unsigned Depth) {
2755 uint64_t ShiftTypeAndValue =
2756 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2757 APInt Mask(UsefulBits);
2758 Mask.clearAllBits();
2759 Mask.flipAllBits();
2760
2761 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2762 // Shift Left
2763 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2764 Mask <<= ShiftAmt;
2765 getUsefulBits(Op, Mask, Depth + 1);
2766 Mask.lshrInPlace(ShiftAmt);
2767 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2768 // Shift Right
2769 // We do not handle AArch64_AM::ASR, because the sign will change the
2770 // number of useful bits
2771 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2772 Mask.lshrInPlace(ShiftAmt);
2773 getUsefulBits(Op, Mask, Depth + 1);
2774 Mask <<= ShiftAmt;
2775 } else
2776 return;
2777
2778 UsefulBits &= Mask;
2779}
2780
2781static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2782 unsigned Depth) {
2783 uint64_t Imm =
2784 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2785 uint64_t MSB =
2786 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2787
2788 APInt OpUsefulBits(UsefulBits);
2789 OpUsefulBits = 1;
2790
2791 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2792 ResultUsefulBits.flipAllBits();
2793 APInt Mask(UsefulBits.getBitWidth(), 0);
2794
2795 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2796
2797 if (MSB >= Imm) {
2798 // The instruction is a BFXIL.
2799 uint64_t Width = MSB - Imm + 1;
2800 uint64_t LSB = Imm;
2801
2802 OpUsefulBits <<= Width;
2803 --OpUsefulBits;
2804
2805 if (Op.getOperand(1) == Orig) {
2806 // Copy the low bits from the result to bits starting from LSB.
2807 Mask = ResultUsefulBits & OpUsefulBits;
2808 Mask <<= LSB;
2809 }
2810
2811 if (Op.getOperand(0) == Orig)
2812 // Bits starting from LSB in the input contribute to the result.
2813 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2814 } else {
2815 // The instruction is a BFI.
2816 uint64_t Width = MSB + 1;
2817 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2818
2819 OpUsefulBits <<= Width;
2820 --OpUsefulBits;
2821 OpUsefulBits <<= LSB;
2822
2823 if (Op.getOperand(1) == Orig) {
2824 // Copy the bits from the result to the zero bits.
2825 Mask = ResultUsefulBits & OpUsefulBits;
2826 Mask.lshrInPlace(LSB);
2827 }
2828
2829 if (Op.getOperand(0) == Orig)
2830 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2831 }
2832
2833 UsefulBits &= Mask;
2834}
2835
2836static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2837 SDValue Orig, unsigned Depth) {
2838
2839 // Users of this node should have already been instruction selected
2840 // FIXME: Can we turn that into an assert?
2841 if (!UserNode->isMachineOpcode())
2842 return;
2843
2844 switch (UserNode->getMachineOpcode()) {
2845 default:
2846 return;
2847 case AArch64::ANDSWri:
2848 case AArch64::ANDSXri:
2849 case AArch64::ANDWri:
2850 case AArch64::ANDXri:
2851 // We increment Depth only when we call the getUsefulBits
2852 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2853 Depth);
2854 case AArch64::UBFMWri:
2855 case AArch64::UBFMXri:
2856 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2857
2858 case AArch64::ORRWrs:
2859 case AArch64::ORRXrs:
2860 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
2861 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2862 Depth);
2863 return;
2864 case AArch64::BFMWri:
2865 case AArch64::BFMXri:
2866 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2867
2868 case AArch64::STRBBui:
2869 case AArch64::STURBBi:
2870 if (UserNode->getOperand(0) != Orig)
2871 return;
2872 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2873 return;
2874
2875 case AArch64::STRHHui:
2876 case AArch64::STURHHi:
2877 if (UserNode->getOperand(0) != Orig)
2878 return;
2879 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2880 return;
2881 }
2882}
2883
2884static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2886 return;
2887 // Initialize UsefulBits
2888 if (!Depth) {
2889 unsigned Bitwidth = Op.getScalarValueSizeInBits();
2890 // At the beginning, assume every produced bits is useful
2891 UsefulBits = APInt(Bitwidth, 0);
2892 UsefulBits.flipAllBits();
2893 }
2894 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2895
2896 for (SDNode *Node : Op.getNode()->uses()) {
2897 // A use cannot produce useful bits
2898 APInt UsefulBitsForUse = APInt(UsefulBits);
2899 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2900 UsersUsefulBits |= UsefulBitsForUse;
2901 }
2902 // UsefulBits contains the produced bits that are meaningful for the
2903 // current definition, thus a user cannot make a bit meaningful at
2904 // this point
2905 UsefulBits &= UsersUsefulBits;
2906}
2907
2908/// Create a machine node performing a notional SHL of Op by ShlAmount. If
2909/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2910/// 0, return Op unchanged.
2911static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2912 if (ShlAmount == 0)
2913 return Op;
2914
2915 EVT VT = Op.getValueType();
2916 SDLoc dl(Op);
2917 unsigned BitWidth = VT.getSizeInBits();
2918 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2919
2920 SDNode *ShiftNode;
2921 if (ShlAmount > 0) {
2922 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2923 ShiftNode = CurDAG->getMachineNode(
2924 UBFMOpc, dl, VT, Op,
2925 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2926 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2927 } else {
2928 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2929 assert(ShlAmount < 0 && "expected right shift");
2930 int ShrAmount = -ShlAmount;
2931 ShiftNode = CurDAG->getMachineNode(
2932 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2933 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2934 }
2935
2936 return SDValue(ShiftNode, 0);
2937}
2938
2939// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
2941 bool BiggerPattern,
2942 const uint64_t NonZeroBits,
2943 SDValue &Src, int &DstLSB,
2944 int &Width);
2945
2946// For bit-field-positioning pattern "shl VAL, N)".
2948 bool BiggerPattern,
2949 const uint64_t NonZeroBits,
2950 SDValue &Src, int &DstLSB,
2951 int &Width);
2952
2953/// Does this tree qualify as an attempt to move a bitfield into position,
2954/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
2956 bool BiggerPattern, SDValue &Src,
2957 int &DstLSB, int &Width) {
2958 EVT VT = Op.getValueType();
2959 unsigned BitWidth = VT.getSizeInBits();
2960 (void)BitWidth;
2961 assert(BitWidth == 32 || BitWidth == 64);
2962
2963 KnownBits Known = CurDAG->computeKnownBits(Op);
2964
2965 // Non-zero in the sense that they're not provably zero, which is the key
2966 // point if we want to use this value
2967 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2968 if (!isShiftedMask_64(NonZeroBits))
2969 return false;
2970
2971 switch (Op.getOpcode()) {
2972 default:
2973 break;
2974 case ISD::AND:
2975 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
2976 NonZeroBits, Src, DstLSB, Width);
2977 case ISD::SHL:
2978 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
2979 NonZeroBits, Src, DstLSB, Width);
2980 }
2981
2982 return false;
2983}
2984
2986 bool BiggerPattern,
2987 const uint64_t NonZeroBits,
2988 SDValue &Src, int &DstLSB,
2989 int &Width) {
2990 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
2991
2992 EVT VT = Op.getValueType();
2993 assert((VT == MVT::i32 || VT == MVT::i64) &&
2994 "Caller guarantees VT is one of i32 or i64");
2995 (void)VT;
2996
2997 uint64_t AndImm;
2998 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
2999 return false;
3000
3001 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3002 // 1) (AndImm & (1 << POS) == 0)
3003 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3004 //
3005 // 1) and 2) don't agree so something must be wrong (e.g., in
3006 // 'SelectionDAG::computeKnownBits')
3007 assert((~AndImm & NonZeroBits) == 0 &&
3008 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3009
3010 SDValue AndOp0 = Op.getOperand(0);
3011
3012 uint64_t ShlImm;
3013 SDValue ShlOp0;
3014 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3015 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3016 ShlOp0 = AndOp0.getOperand(0);
3017 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3019 ShlImm)) {
3020 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3021
3022 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3023 SDValue ShlVal = AndOp0.getOperand(0);
3024
3025 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3026 // expect VT to be MVT::i32.
3027 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3028
3029 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3030 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3031 } else
3032 return false;
3033
3034 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3035 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3036 // AndOp0+AND.
3037 if (!BiggerPattern && !AndOp0.hasOneUse())
3038 return false;
3039
3040 DstLSB = llvm::countr_zero(NonZeroBits);
3041 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3042
3043 // Bail out on large Width. This happens when no proper combining / constant
3044 // folding was performed.
3045 if (Width >= (int)VT.getSizeInBits()) {
3046 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3047 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3048 // "val".
3049 // If VT is i32, what Width >= 32 means:
3050 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3051 // demands at least 'Width' bits (after dag-combiner). This together with
3052 // `any_extend` Op (undefined higher bits) indicates missed combination
3053 // when lowering the 'and' IR instruction to an machine IR instruction.
3054 LLVM_DEBUG(
3055 dbgs()
3056 << "Found large Width in bit-field-positioning -- this indicates no "
3057 "proper combining / constant folding was performed\n");
3058 return false;
3059 }
3060
3061 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3062 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3063 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3064 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3065 // which case it is not profitable to insert an extra shift.
3066 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3067 return false;
3068
3069 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3070 return true;
3071}
3072
3073// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3074// UBFIZ.
3076 SDValue &Src, int &DstLSB,
3077 int &Width) {
3078 // Caller should have verified that N is a left shift with constant shift
3079 // amount; asserts that.
3080 assert(Op.getOpcode() == ISD::SHL &&
3081 "Op.getNode() should be a SHL node to call this function");
3082 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3083 "Op.getNode() should shift ShlImm to call this function");
3084
3085 uint64_t AndImm = 0;
3086 SDValue Op0 = Op.getOperand(0);
3087 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3088 return false;
3089
3090 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3091 if (isMask_64(ShiftedAndImm)) {
3092 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3093 // should end with Mask, and could be prefixed with random bits if those
3094 // bits are shifted out.
3095 //
3096 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3097 // the AND result corresponding to those bits are shifted out, so it's fine
3098 // to not extract them.
3099 Width = llvm::countr_one(ShiftedAndImm);
3100 DstLSB = ShlImm;
3101 Src = Op0.getOperand(0);
3102 return true;
3103 }
3104 return false;
3105}
3106
3108 bool BiggerPattern,
3109 const uint64_t NonZeroBits,
3110 SDValue &Src, int &DstLSB,
3111 int &Width) {
3112 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3113
3114 EVT VT = Op.getValueType();
3115 assert((VT == MVT::i32 || VT == MVT::i64) &&
3116 "Caller guarantees that type is i32 or i64");
3117 (void)VT;
3118
3119 uint64_t ShlImm;
3120 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3121 return false;
3122
3123 if (!BiggerPattern && !Op.hasOneUse())
3124 return false;
3125
3126 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3127 return true;
3128
3129 DstLSB = llvm::countr_zero(NonZeroBits);
3130 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3131
3132 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3133 return false;
3134
3135 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3136 return true;
3137}
3138
3139static bool isShiftedMask(uint64_t Mask, EVT VT) {
3140 assert(VT == MVT::i32 || VT == MVT::i64);
3141 if (VT == MVT::i32)
3142 return isShiftedMask_32(Mask);
3143 return isShiftedMask_64(Mask);
3144}
3145
3146// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3147// inserted only sets known zero bits.
3149 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3150
3151 EVT VT = N->getValueType(0);
3152 if (VT != MVT::i32 && VT != MVT::i64)
3153 return false;
3154
3155 unsigned BitWidth = VT.getSizeInBits();
3156
3157 uint64_t OrImm;
3158 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3159 return false;
3160
3161 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3162 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3163 // performance neutral.
3165 return false;
3166
3167 uint64_t MaskImm;
3168 SDValue And = N->getOperand(0);
3169 // Must be a single use AND with an immediate operand.
3170 if (!And.hasOneUse() ||
3171 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3172 return false;
3173
3174 // Compute the Known Zero for the AND as this allows us to catch more general
3175 // cases than just looking for AND with imm.
3176 KnownBits Known = CurDAG->computeKnownBits(And);
3177
3178 // Non-zero in the sense that they're not provably zero, which is the key
3179 // point if we want to use this value.
3180 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3181
3182 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3183 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3184 return false;
3185
3186 // The bits being inserted must only set those bits that are known to be zero.
3187 if ((OrImm & NotKnownZero) != 0) {
3188 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3189 // currently handle this case.
3190 return false;
3191 }
3192
3193 // BFI/BFXIL dst, src, #lsb, #width.
3194 int LSB = llvm::countr_one(NotKnownZero);
3195 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3196
3197 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3198 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3199 unsigned ImmS = Width - 1;
3200
3201 // If we're creating a BFI instruction avoid cases where we need more
3202 // instructions to materialize the BFI constant as compared to the original
3203 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3204 // should be no worse in this case.
3205 bool IsBFI = LSB != 0;
3206 uint64_t BFIImm = OrImm >> LSB;
3207 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3208 // We have a BFI instruction and we know the constant can't be materialized
3209 // with a ORR-immediate with the zero register.
3210 unsigned OrChunks = 0, BFIChunks = 0;
3211 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3212 if (((OrImm >> Shift) & 0xFFFF) != 0)
3213 ++OrChunks;
3214 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3215 ++BFIChunks;
3216 }
3217 if (BFIChunks > OrChunks)
3218 return false;
3219 }
3220
3221 // Materialize the constant to be inserted.
3222 SDLoc DL(N);
3223 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3224 SDNode *MOVI = CurDAG->getMachineNode(
3225 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3226
3227 // Create the BFI/BFXIL instruction.
3228 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3229 CurDAG->getTargetConstant(ImmR, DL, VT),
3230 CurDAG->getTargetConstant(ImmS, DL, VT)};
3231 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3232 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3233 return true;
3234}
3235
3237 SDValue &ShiftedOperand,
3238 uint64_t &EncodedShiftImm) {
3239 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3240 if (!Dst.hasOneUse())
3241 return false;
3242
3243 EVT VT = Dst.getValueType();
3244 assert((VT == MVT::i32 || VT == MVT::i64) &&
3245 "Caller should guarantee that VT is one of i32 or i64");
3246 const unsigned SizeInBits = VT.getSizeInBits();
3247
3248 SDLoc DL(Dst.getNode());
3249 uint64_t AndImm, ShlImm;
3250 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3251 isShiftedMask_64(AndImm)) {
3252 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3253 SDValue DstOp0 = Dst.getOperand(0);
3254 if (!DstOp0.hasOneUse())
3255 return false;
3256
3257 // An example to illustrate the transformation
3258 // From:
3259 // lsr x8, x1, #1
3260 // and x8, x8, #0x3f80
3261 // bfxil x8, x1, #0, #7
3262 // To:
3263 // and x8, x23, #0x7f
3264 // ubfx x9, x23, #8, #7
3265 // orr x23, x8, x9, lsl #7
3266 //
3267 // The number of instructions remains the same, but ORR is faster than BFXIL
3268 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3269 // the dependency chain is improved after the transformation.
3270 uint64_t SrlImm;
3271 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3272 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3273 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3274 unsigned MaskWidth =
3275 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3276 unsigned UBFMOpc =
3277 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3278 SDNode *UBFMNode = CurDAG->getMachineNode(
3279 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3280 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3281 VT),
3282 CurDAG->getTargetConstant(
3283 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3284 ShiftedOperand = SDValue(UBFMNode, 0);
3285 EncodedShiftImm = AArch64_AM::getShifterImm(
3286 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3287 return true;
3288 }
3289 }
3290 return false;
3291 }
3292
3293 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3294 ShiftedOperand = Dst.getOperand(0);
3295 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3296 return true;
3297 }
3298
3299 uint64_t SrlImm;
3300 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3301 ShiftedOperand = Dst.getOperand(0);
3302 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3303 return true;
3304 }
3305 return false;
3306}
3307
3308// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3309// the operands and select it to AArch64::ORR with shifted registers if
3310// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3311static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3312 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3313 const bool BiggerPattern) {
3314 EVT VT = N->getValueType(0);
3315 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3316 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3317 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3318 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3319 assert((VT == MVT::i32 || VT == MVT::i64) &&
3320 "Expect result type to be i32 or i64 since N is combinable to BFM");
3321 SDLoc DL(N);
3322
3323 // Bail out if BFM simplifies away one node in BFM Dst.
3324 if (OrOpd1 != Dst)
3325 return false;
3326
3327 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3328 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3329 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3330 if (BiggerPattern) {
3331 uint64_t SrcAndImm;
3332 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3333 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3334 // OrOpd0 = AND Src, #Mask
3335 // So BFM simplifies away one AND node from Src and doesn't simplify away
3336 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3337 // one node (from Rd), ORR is better since it has higher throughput and
3338 // smaller latency than BFM on many AArch64 processors (and for the rest
3339 // ORR is at least as good as BFM).
3340 SDValue ShiftedOperand;
3341 uint64_t EncodedShiftImm;
3342 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3343 EncodedShiftImm)) {
3344 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3345 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3346 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3347 return true;
3348 }
3349 }
3350 return false;
3351 }
3352
3353 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3354
3355 uint64_t ShlImm;
3356 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3357 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3358 SDValue Ops[] = {
3359 Dst, Src,
3360 CurDAG->getTargetConstant(
3362 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3363 return true;
3364 }
3365
3366 // Select the following pattern to left-shifted operand rather than BFI.
3367 // %val1 = op ..
3368 // %val2 = shl %val1, #imm
3369 // %res = or %val1, %val2
3370 //
3371 // If N is selected to be BFI, we know that
3372 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3373 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3374 //
3375 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3376 if (OrOpd0.getOperand(0) == OrOpd1) {
3377 SDValue Ops[] = {
3378 OrOpd1, OrOpd1,
3379 CurDAG->getTargetConstant(
3381 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3382 return true;
3383 }
3384 }
3385
3386 uint64_t SrlImm;
3387 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3388 // Select the following pattern to right-shifted operand rather than BFXIL.
3389 // %val1 = op ..
3390 // %val2 = lshr %val1, #imm
3391 // %res = or %val1, %val2
3392 //
3393 // If N is selected to be BFXIL, we know that
3394 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3395 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3396 //
3397 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3398 if (OrOpd0.getOperand(0) == OrOpd1) {
3399 SDValue Ops[] = {
3400 OrOpd1, OrOpd1,
3401 CurDAG->getTargetConstant(
3403 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3404 return true;
3405 }
3406 }
3407
3408 return false;
3409}
3410
3411static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3412 SelectionDAG *CurDAG) {
3413 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3414
3415 EVT VT = N->getValueType(0);
3416 if (VT != MVT::i32 && VT != MVT::i64)
3417 return false;
3418
3419 unsigned BitWidth = VT.getSizeInBits();
3420
3421 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3422 // have the expected shape. Try to undo that.
3423
3424 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3425 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3426
3427 // Given a OR operation, check if we have the following pattern
3428 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3429 // isBitfieldExtractOp)
3430 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3431 // countTrailingZeros(mask2) == imm2 - imm + 1
3432 // f = d | c
3433 // if yes, replace the OR instruction with:
3434 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3435
3436 // OR is commutative, check all combinations of operand order and values of
3437 // BiggerPattern, i.e.
3438 // Opd0, Opd1, BiggerPattern=false
3439 // Opd1, Opd0, BiggerPattern=false
3440 // Opd0, Opd1, BiggerPattern=true
3441 // Opd1, Opd0, BiggerPattern=true
3442 // Several of these combinations may match, so check with BiggerPattern=false
3443 // first since that will produce better results by matching more instructions
3444 // and/or inserting fewer extra instructions.
3445 for (int I = 0; I < 4; ++I) {
3446
3447 SDValue Dst, Src;
3448 unsigned ImmR, ImmS;
3449 bool BiggerPattern = I / 2;
3450 SDValue OrOpd0Val = N->getOperand(I % 2);
3451 SDNode *OrOpd0 = OrOpd0Val.getNode();
3452 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3453 SDNode *OrOpd1 = OrOpd1Val.getNode();
3454
3455 unsigned BFXOpc;
3456 int DstLSB, Width;
3457 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3458 NumberOfIgnoredLowBits, BiggerPattern)) {
3459 // Check that the returned opcode is compatible with the pattern,
3460 // i.e., same type and zero extended (U and not S)
3461 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3462 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3463 continue;
3464
3465 // Compute the width of the bitfield insertion
3466 DstLSB = 0;
3467 Width = ImmS - ImmR + 1;
3468 // FIXME: This constraint is to catch bitfield insertion we may
3469 // want to widen the pattern if we want to grab general bitfied
3470 // move case
3471 if (Width <= 0)
3472 continue;
3473
3474 // If the mask on the insertee is correct, we have a BFXIL operation. We
3475 // can share the ImmR and ImmS values from the already-computed UBFM.
3476 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3477 BiggerPattern,
3478 Src, DstLSB, Width)) {
3479 ImmR = (BitWidth - DstLSB) % BitWidth;
3480 ImmS = Width - 1;
3481 } else
3482 continue;
3483
3484 // Check the second part of the pattern
3485 EVT VT = OrOpd1Val.getValueType();
3486 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3487
3488 // Compute the Known Zero for the candidate of the first operand.
3489 // This allows to catch more general case than just looking for
3490 // AND with imm. Indeed, simplify-demanded-bits may have removed
3491 // the AND instruction because it proves it was useless.
3492 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3493
3494 // Check if there is enough room for the second operand to appear
3495 // in the first one
3496 APInt BitsToBeInserted =
3497 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3498
3499 if ((BitsToBeInserted & ~Known.Zero) != 0)
3500 continue;
3501
3502 // Set the first operand
3503 uint64_t Imm;
3504 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3505 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3506 // In that case, we can eliminate the AND
3507 Dst = OrOpd1->getOperand(0);
3508 else
3509 // Maybe the AND has been removed by simplify-demanded-bits
3510 // or is useful because it discards more bits
3511 Dst = OrOpd1Val;
3512
3513 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3514 // with shifted operand is more efficient.
3515 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3516 BiggerPattern))
3517 return true;
3518
3519 // both parts match
3520 SDLoc DL(N);
3521 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3522 CurDAG->getTargetConstant(ImmS, DL, VT)};
3523 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3524 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3525 return true;
3526 }
3527
3528 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3529 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3530 // mask (e.g., 0x000ffff0).
3531 uint64_t Mask0Imm, Mask1Imm;
3532 SDValue And0 = N->getOperand(0);
3533 SDValue And1 = N->getOperand(1);
3534 if (And0.hasOneUse() && And1.hasOneUse() &&
3535 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3536 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3537 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3538 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3539
3540 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3541 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3542 // bits to be inserted.
3543 if (isShiftedMask(Mask0Imm, VT)) {
3544 std::swap(And0, And1);
3545 std::swap(Mask0Imm, Mask1Imm);
3546 }
3547
3548 SDValue Src = And1->getOperand(0);
3549 SDValue Dst = And0->getOperand(0);
3550 unsigned LSB = llvm::countr_zero(Mask1Imm);
3551 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3552
3553 // The BFXIL inserts the low-order bits from a source register, so right
3554 // shift the needed bits into place.
3555 SDLoc DL(N);
3556 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3557 uint64_t LsrImm = LSB;
3558 if (Src->hasOneUse() &&
3559 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3560 (LsrImm + LSB) < BitWidth) {
3561 Src = Src->getOperand(0);
3562 LsrImm += LSB;
3563 }
3564
3565 SDNode *LSR = CurDAG->getMachineNode(
3566 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3567 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3568
3569 // BFXIL is an alias of BFM, so translate to BFM operands.
3570 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3571 unsigned ImmS = Width - 1;
3572
3573 // Create the BFXIL instruction.
3574 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3575 CurDAG->getTargetConstant(ImmR, DL, VT),
3576 CurDAG->getTargetConstant(ImmS, DL, VT)};
3577 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3578 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3579 return true;
3580 }
3581
3582 return false;
3583}
3584
3585bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3586 if (N->getOpcode() != ISD::OR)
3587 return false;
3588
3589 APInt NUsefulBits;
3590 getUsefulBits(SDValue(N, 0), NUsefulBits);
3591
3592 // If all bits are not useful, just return UNDEF.
3593 if (!NUsefulBits) {
3594 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3595 return true;
3596 }
3597
3598 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3599 return true;
3600
3601 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3602}
3603
3604/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3605/// equivalent of a left shift by a constant amount followed by an and masking
3606/// out a contiguous set of bits.
3607bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3608 if (N->getOpcode() != ISD::AND)
3609 return false;
3610
3611 EVT VT = N->getValueType(0);
3612 if (VT != MVT::i32 && VT != MVT::i64)
3613 return false;
3614
3615 SDValue Op0;
3616 int DstLSB, Width;
3617 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3618 Op0, DstLSB, Width))
3619 return false;
3620
3621 // ImmR is the rotate right amount.
3622 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3623 // ImmS is the most significant bit of the source to be moved.
3624 unsigned ImmS = Width - 1;
3625
3626 SDLoc DL(N);
3627 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3628 CurDAG->getTargetConstant(ImmS, DL, VT)};
3629 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3630 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3631 return true;
3632}
3633
3634/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3635/// variable shift/rotate instructions.
3636bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3637 EVT VT = N->getValueType(0);
3638
3639 unsigned Opc;
3640 switch (N->getOpcode()) {
3641 case ISD::ROTR:
3642 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3643 break;
3644 case ISD::SHL:
3645 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3646 break;
3647 case ISD::SRL:
3648 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3649 break;
3650 case ISD::SRA:
3651 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3652 break;
3653 default:
3654 return false;
3655 }
3656
3657 uint64_t Size;
3658 uint64_t Bits;
3659 if (VT == MVT::i32) {
3660 Bits = 5;
3661 Size = 32;
3662 } else if (VT == MVT::i64) {
3663 Bits = 6;
3664 Size = 64;
3665 } else
3666 return false;
3667
3668 SDValue ShiftAmt = N->getOperand(1);
3669 SDLoc DL(N);
3670 SDValue NewShiftAmt;
3671
3672 // Skip over an extend of the shift amount.
3673 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3674 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3675 ShiftAmt = ShiftAmt->getOperand(0);
3676
3677 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3678 SDValue Add0 = ShiftAmt->getOperand(0);
3679 SDValue Add1 = ShiftAmt->getOperand(1);
3680 uint64_t Add0Imm;
3681 uint64_t Add1Imm;
3682 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3683 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3684 // to avoid the ADD/SUB.
3685 NewShiftAmt = Add0;
3686 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3687 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3688 (Add0Imm % Size == 0)) {
3689 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3690 // to generate a NEG instead of a SUB from a constant.
3691 unsigned NegOpc;
3692 unsigned ZeroReg;
3693 EVT SubVT = ShiftAmt->getValueType(0);
3694 if (SubVT == MVT::i32) {
3695 NegOpc = AArch64::SUBWrr;
3696 ZeroReg = AArch64::WZR;
3697 } else {
3698 assert(SubVT == MVT::i64);
3699 NegOpc = AArch64::SUBXrr;
3700 ZeroReg = AArch64::XZR;
3701 }
3702 SDValue Zero =
3703 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3704 MachineSDNode *Neg =
3705 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3706 NewShiftAmt = SDValue(Neg, 0);
3707 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3708 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3709 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3710 // to generate a NOT instead of a SUB from a constant.
3711 unsigned NotOpc;
3712 unsigned ZeroReg;
3713 EVT SubVT = ShiftAmt->getValueType(0);
3714 if (SubVT == MVT::i32) {
3715 NotOpc = AArch64::ORNWrr;
3716 ZeroReg = AArch64::WZR;
3717 } else {
3718 assert(SubVT == MVT::i64);
3719 NotOpc = AArch64::ORNXrr;
3720 ZeroReg = AArch64::XZR;
3721 }
3722 SDValue Zero =
3723 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3725 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3726 NewShiftAmt = SDValue(Not, 0);
3727 } else
3728 return false;
3729 } else {
3730 // If the shift amount is masked with an AND, check that the mask covers the
3731 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3732 // the AND.
3733 uint64_t MaskImm;
3734 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3735 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3736 return false;
3737
3738 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3739 return false;
3740
3741 NewShiftAmt = ShiftAmt->getOperand(0);
3742 }
3743
3744 // Narrow/widen the shift amount to match the size of the shift operation.
3745 if (VT == MVT::i32)
3746 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3747 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3748 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3749 MachineSDNode *Ext = CurDAG->getMachineNode(
3750 AArch64::SUBREG_TO_REG, DL, VT,
3751 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3752 NewShiftAmt = SDValue(Ext, 0);
3753 }
3754
3755 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3756 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3757 return true;
3758}
3759
3761 SDValue &FixedPos,
3762 unsigned RegWidth,
3763 bool isReciprocal) {
3764 APFloat FVal(0.0);
3765 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
3766 FVal = CN->getValueAPF();
3767 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3768 // Some otherwise illegal constants are allowed in this case.
3769 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3770 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3771 return false;
3772
3773 ConstantPoolSDNode *CN =
3774 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3775 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3776 } else
3777 return false;
3778
3779 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3780 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3781 // x-register.
3782 //
3783 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3784 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3785 // integers.
3786 bool IsExact;
3787
3788 if (isReciprocal)
3789 if (!FVal.getExactInverse(&FVal))
3790 return false;
3791
3792 // fbits is between 1 and 64 in the worst-case, which means the fmul
3793 // could have 2^64 as an actual operand. Need 65 bits of precision.
3794 APSInt IntVal(65, true);
3795 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
3796
3797 // N.b. isPowerOf2 also checks for > 0.
3798 if (!IsExact || !IntVal.isPowerOf2())
3799 return false;
3800 unsigned FBits = IntVal.logBase2();
3801
3802 // Checks above should have guaranteed that we haven't lost information in
3803 // finding FBits, but it must still be in range.
3804 if (FBits == 0 || FBits > RegWidth) return false;
3805
3806 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
3807 return true;
3808}
3809
3810bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3811 unsigned RegWidth) {
3812 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3813 false);
3814}
3815
3816bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
3817 SDValue &FixedPos,
3818 unsigned RegWidth) {
3819 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3820 true);
3821}
3822
3823// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3824// of the string and obtains the integer values from them and combines these
3825// into a single value to be used in the MRS/MSR instruction.
3828 RegString.split(Fields, ':');
3829
3830 if (Fields.size() == 1)
3831 return -1;
3832
3833 assert(Fields.size() == 5
3834 && "Invalid number of fields in read register string");
3835
3837 bool AllIntFields = true;
3838
3839 for (StringRef Field : Fields) {
3840 unsigned IntField;
3841 AllIntFields &= !Field.getAsInteger(10, IntField);
3842 Ops.push_back(IntField);
3843 }
3844
3845 assert(AllIntFields &&
3846 "Unexpected non-integer value in special register string.");
3847 (void)AllIntFields;
3848
3849 // Need to combine the integer fields of the string into a single value
3850 // based on the bit encoding of MRS/MSR instruction.
3851 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
3852 (Ops[3] << 3) | (Ops[4]);
3853}
3854
3855// Lower the read_register intrinsic to an MRS instruction node if the special
3856// register string argument is either of the form detailed in the ALCE (the
3857// form described in getIntOperandsFromRegsterString) or is a named register
3858// known by the MRS SysReg mapper.
3859bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
3860 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3861 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3862 SDLoc DL(N);
3863
3864 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
3865
3866 unsigned Opcode64Bit = AArch64::MRS;
3867 int Imm = getIntOperandFromRegisterString(RegString->getString());
3868 if (Imm == -1) {
3869 // No match, Use the sysreg mapper to map the remaining possible strings to
3870 // the value for the register to be used for the instruction operand.
3871 const auto *TheReg =
3872 AArch64SysReg::lookupSysRegByName(RegString->getString());
3873 if (TheReg && TheReg->Readable &&
3874 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3875 Imm = TheReg->Encoding;
3876 else
3877 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3878
3879 if (Imm == -1) {
3880 // Still no match, see if this is "pc" or give up.
3881 if (!ReadIs128Bit && RegString->getString() == "pc") {
3882 Opcode64Bit = AArch64::ADR;
3883 Imm = 0;
3884 } else {
3885 return false;
3886 }
3887 }
3888 }
3889
3890 SDValue InChain = N->getOperand(0);
3891 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
3892 if (!ReadIs128Bit) {
3893 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
3894 {SysRegImm, InChain});
3895 } else {
3896 SDNode *MRRS = CurDAG->getMachineNode(
3897 AArch64::MRRS, DL,
3898 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
3899 {SysRegImm, InChain});
3900
3901 // Sysregs are not endian. The even register always contains the low half
3902 // of the register.
3903 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
3904 SDValue(MRRS, 0));
3905 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
3906 SDValue(MRRS, 0));
3907 SDValue OutChain = SDValue(MRRS, 1);
3908
3909 ReplaceUses(SDValue(N, 0), Lo);
3910 ReplaceUses(SDValue(N, 1), Hi);
3911 ReplaceUses(SDValue(N, 2), OutChain);
3912 };
3913 return true;
3914}
3915
3916// Lower the write_register intrinsic to an MSR instruction node if the special
3917// register string argument is either of the form detailed in the ALCE (the
3918// form described in getIntOperandsFromRegsterString) or is a named register
3919// known by the MSR SysReg mapper.
3920bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
3921 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3922 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3923 SDLoc DL(N);
3924
3925 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
3926
3927 if (!WriteIs128Bit) {
3928 // Check if the register was one of those allowed as the pstatefield value
3929 // in the MSR (immediate) instruction. To accept the values allowed in the
3930 // pstatefield for the MSR (immediate) instruction, we also require that an
3931 // immediate value has been provided as an argument, we know that this is
3932 // the case as it has been ensured by semantic checking.
3933 auto trySelectPState = [&](auto PMapper, unsigned State) {
3934 if (PMapper) {
3935 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
3936 "Expected a constant integer expression.");
3937 unsigned Reg = PMapper->Encoding;
3938 uint64_t Immed = N->getConstantOperandVal(2);
3939 CurDAG->SelectNodeTo(
3940 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
3941 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
3942 return true;
3943 }
3944 return false;
3945 };
3946
3947 if (trySelectPState(
3948 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
3949 AArch64::MSRpstateImm4))
3950 return true;
3951 if (trySelectPState(
3952 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
3953 AArch64::MSRpstateImm1))
3954 return true;
3955 }
3956
3957 int Imm = getIntOperandFromRegisterString(RegString->getString());
3958 if (Imm == -1) {
3959 // Use the sysreg mapper to attempt to map the remaining possible strings
3960 // to the value for the register to be used for the MSR (register)
3961 // instruction operand.
3962 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
3963 if (TheReg && TheReg->Writeable &&
3964 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3965 Imm = TheReg->Encoding;
3966 else
3967 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3968
3969 if (Imm == -1)
3970 return false;
3971 }
3972
3973 SDValue InChain = N->getOperand(0);
3974 if (!WriteIs128Bit) {
3975 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
3976 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3977 N->getOperand(2), InChain);
3978 } else {
3979 // No endian swap. The lower half always goes into the even subreg, and the
3980 // higher half always into the odd supreg.
3981 SDNode *Pair = CurDAG->getMachineNode(
3982 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
3983 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
3984 MVT::i32),
3985 N->getOperand(2),
3986 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
3987 N->getOperand(3),
3988 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
3989
3990 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
3991 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3992 SDValue(Pair, 0), InChain);
3993 }
3994
3995 return true;
3996}
3997
3998/// We've got special pseudo-instructions for these
3999bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4000 unsigned Opcode;
4001 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4002
4003 // Leave IR for LSE if subtarget supports it.
4004 if (Subtarget->hasLSE()) return false;
4005
4006 if (MemTy == MVT::i8)
4007 Opcode = AArch64::CMP_SWAP_8;
4008 else if (MemTy == MVT::i16)
4009 Opcode = AArch64::CMP_SWAP_16;
4010 else if (MemTy == MVT::i32)
4011 Opcode = AArch64::CMP_SWAP_32;
4012 else if (MemTy == MVT::i64)
4013 Opcode = AArch64::CMP_SWAP_64;
4014 else
4015 llvm_unreachable("Unknown AtomicCmpSwap type");
4016
4017 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4018 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4019 N->getOperand(0)};
4020 SDNode *CmpSwap = CurDAG->getMachineNode(
4021 Opcode, SDLoc(N),
4022 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4023
4024 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4025 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4026
4027 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4028 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4029 CurDAG->RemoveDeadNode(N);
4030
4031 return true;
4032}
4033
4034bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4035 SDValue &Shift) {
4036 if (!isa<ConstantSDNode>(N))
4037 return false;
4038
4039 SDLoc DL(N);
4040 uint64_t Val = cast<ConstantSDNode>(N)
4041 ->getAPIntValue()
4042 .trunc(VT.getFixedSizeInBits())
4043 .getZExtValue();
4044
4045 switch (VT.SimpleTy) {
4046 case MVT::i8:
4047 // All immediates are supported.
4048 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4049 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4050 return true;
4051 case MVT::i16:
4052 case MVT::i32:
4053 case MVT::i64:
4054 // Support 8bit unsigned immediates.
4055 if (Val <= 255) {
4056 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4057 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4058 return true;
4059 }
4060 // Support 16bit unsigned immediates that are a multiple of 256.
4061 if (Val <= 65280 && Val % 256 == 0) {
4062 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4063 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4064 return true;
4065 }
4066 break;
4067 default:
4068 break;
4069 }
4070
4071 return false;
4072}
4073
4074bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4075 SDValue &Imm, SDValue &Shift,
4076 bool Negate) {
4077 if (!isa<ConstantSDNode>(N))
4078 return false;
4079
4080 SDLoc DL(N);
4081 int64_t Val = cast<ConstantSDNode>(N)
4082 ->getAPIntValue()
4083 .trunc(VT.getFixedSizeInBits())
4084 .getSExtValue();
4085
4086 if (Negate)
4087 Val = -Val;
4088
4089 // Signed saturating instructions treat their immediate operand as unsigned,
4090 // whereas the related intrinsics define their operands to be signed. This
4091 // means we can only use the immediate form when the operand is non-negative.
4092 if (Val < 0)
4093 return false;
4094
4095 switch (VT.SimpleTy) {
4096 case MVT::i8:
4097 // All positive immediates are supported.
4098 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4099 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4100 return true;
4101 case MVT::i16:
4102 case MVT::i32:
4103 case MVT::i64:
4104 // Support 8bit positive immediates.
4105 if (Val <= 255) {
4106 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4107 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4108 return true;
4109 }
4110 // Support 16bit positive immediates that are a multiple of 256.
4111 if (Val <= 65280 && Val % 256 == 0) {
4112 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4113 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4114 return true;
4115 }
4116 break;
4117 default:
4118 break;
4119 }
4120
4121 return false;
4122}
4123
4124bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4125 SDValue &Shift) {
4126 if (!isa<ConstantSDNode>(N))
4127 return false;
4128
4129 SDLoc DL(N);
4130 int64_t Val = cast<ConstantSDNode>(N)
4131 ->getAPIntValue()
4132 .trunc(VT.getFixedSizeInBits())
4133 .getSExtValue();
4134
4135 switch (VT.SimpleTy) {
4136 case MVT::i8:
4137 // All immediates are supported.
4138 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4139 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4140 return true;
4141 case MVT::i16:
4142 case MVT::i32:
4143 case MVT::i64:
4144 // Support 8bit signed immediates.
4145 if (Val >= -128 && Val <= 127) {
4146 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4147 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4148 return true;
4149 }
4150 // Support 16bit signed immediates that are a multiple of 256.
4151 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
4152 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4153 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
4154 return true;
4155 }
4156 break;
4157 default:
4158 break;
4159 }
4160
4161 return false;
4162}
4163
4164bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4165 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4166 int64_t ImmVal = CNode->getSExtValue();
4167 SDLoc DL(N);
4168 if (ImmVal >= -128 && ImmVal < 128) {
4169 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4170 return true;
4171 }
4172 }
4173 return false;
4174}
4175
4176bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4177 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4178 uint64_t ImmVal = CNode->getZExtValue();
4179
4180 switch (VT.SimpleTy) {
4181 case MVT::i8:
4182 ImmVal &= 0xFF;
4183 break;
4184 case MVT::i16:
4185 ImmVal &= 0xFFFF;
4186 break;
4187 case MVT::i32:
4188 ImmVal &= 0xFFFFFFFF;
4189 break;
4190 case MVT::i64:
4191 break;
4192 default:
4193 llvm_unreachable("Unexpected type");
4194 }
4195
4196 if (ImmVal < 256) {
4197 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4198 return true;
4199 }
4200 }
4201 return false;
4202}
4203
4204bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4205 bool Invert) {
4206 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4207 uint64_t ImmVal = CNode->getZExtValue();
4208 SDLoc DL(N);
4209
4210 if (Invert)
4211 ImmVal = ~ImmVal;
4212
4213 // Shift mask depending on type size.
4214 switch (VT.SimpleTy) {
4215 case MVT::i8:
4216 ImmVal &= 0xFF;
4217 ImmVal |= ImmVal << 8;
4218 ImmVal |= ImmVal << 16;
4219 ImmVal |= ImmVal << 32;
4220 break;
4221 case MVT::i16:
4222 ImmVal &= 0xFFFF;
4223 ImmVal |= ImmVal << 16;
4224 ImmVal |= ImmVal << 32;
4225 break;
4226 case MVT::i32:
4227 ImmVal &= 0xFFFFFFFF;
4228 ImmVal |= ImmVal << 32;
4229 break;
4230 case MVT::i64:
4231 break;
4232 default:
4233 llvm_unreachable("Unexpected type");
4234 }
4235
4236 uint64_t encoding;
4237 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4238 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4239 return true;
4240 }
4241 }
4242 return false;
4243}
4244
4245// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4246// Rather than attempt to normalise everything we can sometimes saturate the
4247// shift amount during selection. This function also allows for consistent
4248// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4249// required by the instructions.
4250bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4251 uint64_t High, bool AllowSaturation,
4252 SDValue &Imm) {
4253 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4254 uint64_t ImmVal = CN->getZExtValue();
4255
4256 // Reject shift amounts that are too small.
4257 if (ImmVal < Low)
4258 return false;
4259
4260 // Reject or saturate shift amounts that are too big.
4261 if (ImmVal > High) {
4262 if (!AllowSaturation)
4263 return false;
4264 ImmVal = High;
4265 }
4266
4267 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4268 return true;
4269 }
4270
4271 return false;
4272}
4273
4274bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4275 // tagp(FrameIndex, IRGstack, tag_offset):
4276 // since the offset between FrameIndex and IRGstack is a compile-time
4277 // constant, this can be lowered to a single ADDG instruction.
4278 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4279 return false;
4280 }
4281
4282 SDValue IRG_SP = N->getOperand(2);
4283 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4284 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4285 return false;
4286 }
4287
4288 const TargetLowering *TLI = getTargetLowering();
4289 SDLoc DL(N);
4290 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4291 SDValue FiOp = CurDAG->getTargetFrameIndex(
4292 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4293 int TagOffset = N->getConstantOperandVal(3);
4294
4295 SDNode *Out = CurDAG->getMachineNode(
4296 AArch64::TAGPstack, DL, MVT::i64,
4297 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4298 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4299 ReplaceNode(N, Out);
4300 return true;
4301}
4302
4303void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4304 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4305 "llvm.aarch64.tagp third argument must be an immediate");
4306 if (trySelectStackSlotTagP(N))
4307 return;
4308 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4309 // compile-time constant, not just for stack allocations.
4310
4311 // General case for unrelated pointers in Op1 and Op2.
4312 SDLoc DL(N);
4313 int TagOffset = N->getConstantOperandVal(3);
4314 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4315 {N->getOperand(1), N->getOperand(2)});
4316 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4317 {SDValue(N1, 0), N->getOperand(2)});
4318 SDNode *N3 = CurDAG->getMachineNode(
4319 AArch64::ADDG, DL, MVT::i64,
4320 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4321 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4322 ReplaceNode(N, N3);
4323}
4324
4325bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4326 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4327
4328 // Bail when not a "cast" like insert_subvector.
4329 if (N->getConstantOperandVal(2) != 0)
4330 return false;
4331 if (!N->getOperand(0).isUndef())
4332 return false;
4333
4334 // Bail when normal isel should do the job.
4335 EVT VT = N->getValueType(0);
4336 EVT InVT = N->getOperand(1).getValueType();
4337 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4338 return false;
4339 if (InVT.getSizeInBits() <= 128)
4340 return false;
4341
4342 // NOTE: We can only get here when doing fixed length SVE code generation.
4343 // We do manual selection because the types involved are not linked to real
4344 // registers (despite being legal) and must be coerced into SVE registers.
4345
4347 "Expected to insert into a packed scalable vector!");
4348
4349 SDLoc DL(N);
4350 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4351 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4352 N->getOperand(1), RC));
4353 return true;
4354}
4355
4356bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4357 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4358
4359 // Bail when not a "cast" like extract_subvector.
4360 if (N->getConstantOperandVal(1) != 0)
4361 return false;
4362
4363 // Bail when normal isel can do the job.
4364 EVT VT = N->getValueType(0);
4365 EVT InVT = N->getOperand(0).getValueType();
4366 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4367 return false;
4368 if (VT.getSizeInBits() <= 128)
4369 return false;
4370
4371 // NOTE: We can only get here when doing fixed length SVE code generation.
4372 // We do manual selection because the types involved are not linked to real
4373 // registers (despite being legal) and must be coerced into SVE registers.
4374
4376 "Expected to extract from a packed scalable vector!");
4377
4378 SDLoc DL(N);
4379 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4380 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4381 N->getOperand(0), RC));
4382 return true;
4383}
4384
4385bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4386 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4387
4388 SDValue N0 = N->getOperand(0);
4389 SDValue N1 = N->getOperand(1);
4390 EVT VT = N->getValueType(0);
4391
4392 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4393 // Rotate by a constant is a funnel shift in IR which is exanded to
4394 // an OR with shifted operands.
4395 // We do the following transform:
4396 // OR N0, N1 -> xar (x, y, imm)
4397 // Where:
4398 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4399 // N0 = SHL_PRED true, V, splat(bits-imm)
4400 // V = (xor x, y)
4401 if (VT.isScalableVector() &&
4402 (Subtarget->hasSVE2() ||
4403 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4404 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4406 std::swap(N0, N1);
4407 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4409 return false;
4410
4411 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4412 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4413 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4414 return false;
4415
4416 SDValue XOR = N0.getOperand(1);
4417 if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(1))
4418 return false;
4419
4420 APInt ShlAmt, ShrAmt;
4421 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4423 return false;
4424
4425 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4426 return false;
4427
4428 SDLoc DL(N);
4429 SDValue Imm =
4430 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4431
4432 SDValue Ops[] = {XOR.getOperand(0), XOR.getOperand(1), Imm};
4433 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4434 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4435 AArch64::XAR_ZZZI_D})) {
4436 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4437 return true;
4438 }
4439 return false;
4440 }
4441
4442 if (!Subtarget->hasSHA3())
4443 return false;
4444
4445 if (N0->getOpcode() != AArch64ISD::VSHL ||
4447 return false;
4448
4449 if (N0->getOperand(0) != N1->getOperand(0) ||
4450 N1->getOperand(0)->getOpcode() != ISD::XOR)
4451 return false;
4452
4453 SDValue XOR = N0.getOperand(0);
4454 SDValue R1 = XOR.getOperand(0);
4455 SDValue R2 = XOR.getOperand(1);
4456
4457 unsigned HsAmt = N0.getConstantOperandVal(1);
4458 unsigned ShAmt = N1.getConstantOperandVal(1);
4459
4460 SDLoc DL = SDLoc(N0.getOperand(1));
4461 SDValue Imm = CurDAG->getTargetConstant(
4462 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4463
4464 if (ShAmt + HsAmt != 64)
4465 return false;
4466
4467 SDValue Ops[] = {R1, R2, Imm};
4468 CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
4469
4470 return true;
4471}
4472
4473void AArch64DAGToDAGISel::Select(SDNode *Node) {
4474 // If we have a custom node, we already have selected!
4475 if (Node->isMachineOpcode()) {
4476 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4477 Node->setNodeId(-1);
4478 return;
4479 }
4480
4481 // Few custom selection stuff.
4482 EVT VT = Node->getValueType(0);
4483
4484 switch (Node->getOpcode()) {
4485 default:
4486 break;
4487
4489 if (SelectCMP_SWAP(Node))
4490 return;
4491 break;
4492
4493 case ISD::READ_REGISTER:
4494 case AArch64ISD::MRRS:
4495 if (tryReadRegister(Node))
4496 return;
4497 break;
4498
4500 case AArch64ISD::MSRR:
4501 if (tryWriteRegister(Node))
4502 return;
4503 break;
4504
4505 case ISD::LOAD: {
4506 // Try to select as an indexed load. Fall through to normal processing
4507 // if we can't.
4508 if (tryIndexedLoad(Node))
4509 return;
4510 break;
4511 }
4512
4513 case ISD::SRL:
4514 case ISD::AND:
4515 case ISD::SRA:
4517 if (tryBitfieldExtractOp(Node))
4518 return;
4519 if (tryBitfieldInsertInZeroOp(Node))
4520 return;
4521 [[fallthrough]];
4522 case ISD::ROTR:
4523 case ISD::SHL:
4524 if (tryShiftAmountMod(Node))
4525 return;
4526 break;
4527
4528 case ISD::SIGN_EXTEND:
4529 if (tryBitfieldExtractOpFromSExt(Node))
4530 return;
4531 break;
4532
4533 case ISD::OR:
4534 if (tryBitfieldInsertOp(Node))
4535 return;
4536 if (trySelectXAR(Node))
4537 return;
4538 break;
4539
4541 if (trySelectCastScalableToFixedLengthVector(Node))
4542 return;
4543 break;
4544 }
4545
4546 case ISD::INSERT_SUBVECTOR: {
4547 if (trySelectCastFixedLengthToScalableVector(Node))
4548 return;
4549 break;
4550 }
4551
4552 case ISD::Constant: {
4553 // Materialize zero constants as copies from WZR/XZR. This allows
4554 // the coalescer to propagate these into other instructions.
4555 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4556 if (ConstNode->isZero()) {
4557 if (VT == MVT::i32) {
4558 SDValue New = CurDAG->getCopyFromReg(
4559 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4560 ReplaceNode(Node, New.getNode());
4561 return;
4562 } else if (VT == MVT::i64) {
4563 SDValue New = CurDAG->getCopyFromReg(
4564 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4565 ReplaceNode(Node, New.getNode());
4566 return;
4567 }
4568 }
4569 break;
4570 }
4571
4572 case ISD::FrameIndex: {
4573 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4574 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4575 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4576 const TargetLowering *TLI = getTargetLowering();
4577 SDValue TFI = CurDAG->getTargetFrameIndex(
4578 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4579 SDLoc DL(Node);
4580 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4581 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4582 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4583 return;
4584 }
4586 unsigned IntNo = Node->getConstantOperandVal(1);
4587 switch (IntNo) {
4588 default:
4589 break;
4590 case Intrinsic::aarch64_gcsss: {
4591 SDLoc DL(Node);
4592 SDValue Chain = Node->getOperand(0);
4593 SDValue Val = Node->getOperand(2);
4594 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4595 SDNode *SS1 =
4596 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4597 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4598 MVT::Other, Zero, SDValue(SS1, 0));
4599 ReplaceNode(Node, SS2);
4600 return;
4601 }
4602 case Intrinsic::aarch64_ldaxp:
4603 case Intrinsic::aarch64_ldxp: {
4604 unsigned Op =
4605 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4606 SDValue MemAddr = Node->getOperand(2);
4607 SDLoc DL(Node);
4608 SDValue Chain = Node->getOperand(0);
4609
4610 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4611 MVT::Other, MemAddr, Chain);
4612
4613 // Transfer memoperands.
4615 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4616 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4617 ReplaceNode(Node, Ld);
4618 return;
4619 }
4620 case Intrinsic::aarch64_stlxp:
4621 case Intrinsic::aarch64_stxp: {
4622 unsigned Op =
4623 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4624 SDLoc DL(Node);
4625 SDValue Chain = Node->getOperand(0);
4626 SDValue ValLo = Node->getOperand(2);
4627 SDValue ValHi = Node->getOperand(3);
4628 SDValue MemAddr = Node->getOperand(4);
4629
4630 // Place arguments in the right order.
4631 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4632
4633 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4634 // Transfer memoperands.
4636 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4637 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4638
4639 ReplaceNode(Node, St);
4640 return;
4641 }
4642 case Intrinsic::aarch64_neon_ld1x2:
4643 if (VT == MVT::v8i8) {
4644 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4645 return;
4646 } else if (VT == MVT::v16i8) {
4647 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4648 return;
4649 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4650 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4651 return;
4652 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4653 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4654 return;
4655 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4656 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4657 return;
4658 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4659 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4660 return;
4661 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4662 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4663 return;
4664 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4665 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4666 return;
4667 }
4668 break;
4669 case Intrinsic::aarch64_neon_ld1x3:
4670 if (VT == MVT::v8i8) {
4671 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4672 return;
4673 } else if (VT == MVT::v16i8) {
4674 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4675 return;
4676 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4677 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4678 return;
4679 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4680 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4681 return;
4682 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4683 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4684 return;
4685 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4686 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4687 return;
4688 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4689 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4690 return;
4691 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4692 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
4693 return;
4694 }
4695 break;
4696 case Intrinsic::aarch64_neon_ld1x4:
4697 if (VT == MVT::v8i8) {
4698 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
4699 return;
4700 } else if (VT == MVT::v16i8) {
4701 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
4702 return;
4703 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4704 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
4705 return;
4706 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4707 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
4708 return;
4709 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4710 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
4711 return;
4712 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4713 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
4714 return;
4715 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4716 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4717 return;
4718 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4719 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
4720 return;
4721 }
4722 break;
4723 case Intrinsic::aarch64_neon_ld2:
4724 if (VT == MVT::v8i8) {
4725 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
4726 return;
4727 } else if (VT == MVT::v16i8) {
4728 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
4729 return;
4730 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4731 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
4732 return;
4733 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4734 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
4735 return;
4736 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4737 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
4738 return;
4739 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4740 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
4741 return;
4742 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4743 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4744 return;
4745 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4746 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
4747 return;
4748 }
4749 break;
4750 case Intrinsic::aarch64_neon_ld3:
4751 if (VT == MVT::v8i8) {
4752 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
4753 return;
4754 } else if (VT == MVT::v16i8) {
4755 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
4756 return;
4757 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4758 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
4759 return;
4760 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4761 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
4762 return;
4763 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4764 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
4765 return;
4766 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4767 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
4768 return;
4769 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4770 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4771 return;
4772 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4773 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
4774 return;
4775 }
4776 break;
4777 case Intrinsic::aarch64_neon_ld4:
4778 if (VT == MVT::v8i8) {
4779 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
4780 return;
4781 } else if (VT == MVT::v16i8) {
4782 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
4783 return;
4784 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4785 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
4786 return;
4787 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4788 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
4789 return;
4790 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4791 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
4792 return;
4793 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4794 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
4795 return;
4796 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4797 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4798 return;
4799 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4800 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
4801 return;
4802 }
4803 break;
4804 case Intrinsic::aarch64_neon_ld2r:
4805 if (VT == MVT::v8i8) {
4806 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
4807 return;
4808 } else if (VT == MVT::v16i8) {
4809 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
4810 return;
4811 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4812 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
4813 return;
4814 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4815 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
4816 return;
4817 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4818 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
4819 return;
4820 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4821 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
4822 return;
4823 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4824 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
4825 return;
4826 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4827 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
4828 return;
4829 }
4830 break;
4831 case Intrinsic::aarch64_neon_ld3r:
4832 if (VT == MVT::v8i8) {
4833 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
4834 return;
4835 } else if (VT == MVT::v16i8) {
4836 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
4837 return;
4838 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4839 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
4840 return;
4841 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4842 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
4843 return;
4844 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4845 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
4846 return;
4847 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4848 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
4849 return;
4850 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4851 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
4852 return;
4853 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4854 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
4855 return;
4856 }
4857 break;
4858 case Intrinsic::aarch64_neon_ld4r:
4859 if (VT == MVT::v8i8) {
4860 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
4861 return;
4862 } else if (VT == MVT::v16i8) {
4863 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
4864 return;
4865 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4866 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
4867 return;
4868 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4869 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
4870 return;
4871 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4872 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
4873 return;
4874 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4875 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
4876 return;
4877 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4878 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
4879 return;
4880 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4881 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
4882 return;
4883 }
4884 break;
4885 case Intrinsic::aarch64_neon_ld2lane:
4886 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4887 SelectLoadLane(Node, 2, AArch64::LD2i8);
4888 return;
4889 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4890 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4891 SelectLoadLane(Node, 2, AArch64::LD2i16);
4892 return;
4893 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4894 VT == MVT::v2f32) {
4895 SelectLoadLane(Node, 2, AArch64::LD2i32);
4896 return;
4897 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4898 VT == MVT::v1f64) {
4899 SelectLoadLane(Node, 2, AArch64::LD2i64);
4900 return;
4901 }
4902 break;
4903 case Intrinsic::aarch64_neon_ld3lane:
4904 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4905 SelectLoadLane(Node, 3, AArch64::LD3i8);
4906 return;
4907 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4908 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4909 SelectLoadLane(Node, 3, AArch64::LD3i16);
4910 return;
4911 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4912 VT == MVT::v2f32) {
4913 SelectLoadLane(Node, 3, AArch64::LD3i32);
4914 return;
4915 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4916 VT == MVT::v1f64) {
4917 SelectLoadLane(Node, 3, AArch64::LD3i64);
4918 return;
4919 }
4920 break;
4921 case Intrinsic::aarch64_neon_ld4lane:
4922 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4923 SelectLoadLane(Node, 4, AArch64::LD4i8);
4924 return;
4925 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4926 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4927 SelectLoadLane(Node, 4, AArch64::LD4i16);
4928 return;
4929 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4930 VT == MVT::v2f32) {
4931 SelectLoadLane(Node, 4, AArch64::LD4i32);
4932 return;
4933 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4934 VT == MVT::v1f64) {
4935 SelectLoadLane(Node, 4, AArch64::LD4i64);
4936 return;
4937 }
4938 break;
4939 case Intrinsic::aarch64_ld64b:
4940 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
4941 return;
4942 case Intrinsic::aarch64_sve_ld2q_sret: {
4943 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
4944 return;
4945 }
4946 case Intrinsic::aarch64_sve_ld3q_sret: {
4947 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
4948 return;
4949 }
4950 case Intrinsic::aarch64_sve_ld4q_sret: {
4951 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
4952 return;
4953 }
4954 case Intrinsic::aarch64_sve_ld2_sret: {
4955 if (VT == MVT::nxv16i8) {
4956 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
4957 true);
4958 return;
4959 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4960 VT == MVT::nxv8bf16) {
4961 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
4962 true);
4963 return;
4964 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4965 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
4966 true);
4967 return;
4968 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4969 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
4970 true);
4971 return;
4972 }
4973 break;
4974 }
4975 case Intrinsic::aarch64_sve_ld1_pn_x2: {
4976 if (VT == MVT::nxv16i8) {
4977 if (Subtarget->hasSME2())
4978 SelectContiguousMultiVectorLoad(
4979 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
4980 else if (Subtarget->hasSVE2p1())
4981 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
4982 AArch64::LD1B_2Z);
4983 else
4984 break;
4985 return;
4986 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4987 VT == MVT::nxv8bf16) {
4988 if (Subtarget->hasSME2())
4989 SelectContiguousMultiVectorLoad(
4990 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
4991 else if (Subtarget->hasSVE2p1())
4992 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
4993 AArch64::LD1H_2Z);
4994 else
4995 break;
4996 return;
4997 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4998 if (Subtarget->hasSME2())
4999 SelectContiguousMultiVectorLoad(
5000 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5001 else if (Subtarget->hasSVE2p1())
5002 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5003 AArch64::LD1W_2Z);
5004 else
5005 break;
5006 return;
5007 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5008 if (Subtarget->hasSME2())
5009 SelectContiguousMultiVectorLoad(
5010 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5011 else if (Subtarget->hasSVE2p1())
5012 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5013 AArch64::LD1D_2Z);
5014 else
5015 break;
5016 return;
5017 }
5018 break;
5019 }
5020 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5021 if (VT == MVT::nxv16i8) {
5022 if (Subtarget->hasSME2())
5023 SelectContiguousMultiVectorLoad(
5024 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5025 else if (Subtarget->hasSVE2p1())
5026 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5027 AArch64::LD1B_4Z);
5028 else
5029 break;
5030 return;
5031 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5032 VT == MVT::nxv8bf16) {
5033 if (Subtarget->hasSME2())
5034 SelectContiguousMultiVectorLoad(
5035 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5036 else if (Subtarget->hasSVE2p1())
5037 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5038 AArch64::LD1H_4Z);
5039 else
5040 break;
5041 return;
5042 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5043 if (Subtarget->hasSME2())
5044 SelectContiguousMultiVectorLoad(
5045 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5046 else if (Subtarget->hasSVE2p1())
5047 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5048 AArch64::LD1W_4Z);
5049 else
5050 break;
5051 return;
5052 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5053 if (Subtarget->hasSME2())
5054 SelectContiguousMultiVectorLoad(
5055 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5056 else if (Subtarget->hasSVE2p1())
5057 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5058 AArch64::LD1D_4Z);
5059 else
5060 break;
5061 return;
5062 }
5063 break;
5064 }
5065 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5066 if (VT == MVT::nxv16i8) {
5067 if (Subtarget->hasSME2())
5068 SelectContiguousMultiVectorLoad(Node, 2, 0,
5069 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5070 AArch64::LDNT1B_2Z_PSEUDO);
5071 else if (Subtarget->hasSVE2p1())
5072 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5073 AArch64::LDNT1B_2Z);
5074 else
5075 break;
5076 return;
5077 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5078 VT == MVT::nxv8bf16) {
5079 if (Subtarget->hasSME2())
5080 SelectContiguousMultiVectorLoad(Node, 2, 1,
5081 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5082 AArch64::LDNT1H_2Z_PSEUDO);
5083 else if (Subtarget->hasSVE2p1())
5084 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5085 AArch64::LDNT1H_2Z);
5086 else
5087 break;
5088 return;
5089 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5090 if (Subtarget->hasSME2())
5091 SelectContiguousMultiVectorLoad(Node, 2, 2,
5092 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5093 AArch64::LDNT1W_2Z_PSEUDO);
5094 else if (Subtarget->hasSVE2p1())
5095 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5096 AArch64::LDNT1W_2Z);
5097 else
5098 break;
5099 return;
5100 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5101 if (Subtarget->hasSME2())
5102 SelectContiguousMultiVectorLoad(Node, 2, 3,
5103 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5104 AArch64::LDNT1D_2Z_PSEUDO);
5105 else if (Subtarget->hasSVE2p1())
5106 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5107 AArch64::LDNT1D_2Z);
5108 else
5109 break;
5110 return;
5111 }
5112 break;
5113 }
5114 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5115 if (VT == MVT::nxv16i8) {
5116 if (Subtarget->hasSME2())
5117 SelectContiguousMultiVectorLoad(Node, 4, 0,
5118 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5119 AArch64::LDNT1B_4Z_PSEUDO);
5120 else if (Subtarget->hasSVE2p1())
5121 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5122 AArch64::LDNT1B_4Z);
5123 else
5124 break;
5125 return;
5126 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5127 VT == MVT::nxv8bf16) {
5128 if (Subtarget->hasSME2())
5129 SelectContiguousMultiVectorLoad(Node, 4, 1,
5130 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5131 AArch64::LDNT1H_4Z_PSEUDO);
5132 else if (Subtarget->hasSVE2p1())
5133 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5134 AArch64::LDNT1H_4Z);
5135 else
5136 break;
5137 return;
5138 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5139 if (Subtarget->hasSME2())
5140 SelectContiguousMultiVectorLoad(Node, 4, 2,
5141 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5142 AArch64::LDNT1W_4Z_PSEUDO);
5143 else if (Subtarget->hasSVE2p1())
5144 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5145 AArch64::LDNT1W_4Z);
5146 else
5147 break;
5148 return;
5149 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5150 if (Subtarget->hasSME2())
5151 SelectContiguousMultiVectorLoad(Node, 4, 3,
5152 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5153 AArch64::LDNT1D_4Z_PSEUDO);
5154 else if (Subtarget->hasSVE2p1())
5155 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5156 AArch64::LDNT1D_4Z);
5157 else
5158 break;
5159 return;
5160 }
5161 break;
5162 }
5163 case Intrinsic::aarch64_sve_ld3_sret: {
5164 if (VT == MVT::nxv16i8) {
5165 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5166 true);
5167 return;
5168 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5169 VT == MVT::nxv8bf16) {
5170 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5171 true);
5172 return;
5173 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5174 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5175 true);
5176 return;
5177 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5178 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5179 true);
5180 return;
5181 }
5182 break;
5183 }
5184 case Intrinsic::aarch64_sve_ld4_sret: {
5185 if (VT == MVT::nxv16i8) {
5186 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5187 true);
5188 return;
5189 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5190 VT == MVT::nxv8bf16) {
5191 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5192 true);
5193 return;
5194 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5195 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5196 true);
5197 return;
5198 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5199 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5200 true);
5201 return;
5202 }
5203 break;
5204 }
5205 case Intrinsic::aarch64_sme_read_hor_vg2: {
5206 if (VT == MVT::nxv16i8) {
5207 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5208 AArch64::MOVA_2ZMXI_H_B);
5209 return;
5210 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5211 VT == MVT::nxv8bf16) {
5212 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5213 AArch64::MOVA_2ZMXI_H_H);
5214 return;
5215 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5216 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5217 AArch64::MOVA_2ZMXI_H_S);
5218 return;
5219 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5220 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5221 AArch64::MOVA_2ZMXI_H_D);
5222 return;
5223 }
5224 break;
5225 }
5226 case Intrinsic::aarch64_sme_read_ver_vg2: {
5227 if (VT == MVT::nxv16i8) {
5228 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5229 AArch64::MOVA_2ZMXI_V_B);
5230 return;
5231 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5232 VT == MVT::nxv8bf16) {
5233 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5234 AArch64::MOVA_2ZMXI_V_H);
5235 return;
5236 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5237 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5238 AArch64::MOVA_2ZMXI_V_S);
5239 return;
5240 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5241 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5242 AArch64::MOVA_2ZMXI_V_D);
5243 return;
5244 }
5245 break;
5246 }
5247 case Intrinsic::aarch64_sme_read_hor_vg4: {
5248 if (VT == MVT::nxv16i8) {
5249 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5250 AArch64::MOVA_4ZMXI_H_B);
5251 return;
5252 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5253 VT == MVT::nxv8bf16) {
5254 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5255 AArch64::MOVA_4ZMXI_H_H);
5256 return;
5257 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5258 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5259 AArch64::MOVA_4ZMXI_H_S);
5260 return;
5261 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5262 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5263 AArch64::MOVA_4ZMXI_H_D);
5264 return;
5265 }
5266 break;
5267 }
5268 case Intrinsic::aarch64_sme_read_ver_vg4: {
5269 if (VT == MVT::nxv16i8) {
5270 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5271 AArch64::MOVA_4ZMXI_V_B);
5272 return;
5273 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5274 VT == MVT::nxv8bf16) {
5275 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5276 AArch64::MOVA_4ZMXI_V_H);
5277 return;
5278 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5279 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5280 AArch64::MOVA_4ZMXI_V_S);
5281 return;
5282 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5283 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5284 AArch64::MOVA_4ZMXI_V_D);
5285 return;
5286 }
5287 break;
5288 }
5289 case Intrinsic::aarch64_sme_read_vg1x2: {
5290 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5291 AArch64::MOVA_VG2_2ZMXI);
5292 return;
5293 }
5294 case Intrinsic::aarch64_sme_read_vg1x4: {
5295 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5296 AArch64::MOVA_VG4_4ZMXI);
5297 return;
5298 }
5299 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5300 if (VT == MVT::nxv16i8) {
5301 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5302 return;
5303 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5304 VT == MVT::nxv8bf16) {
5305 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5306 return;
5307 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5308 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5309 return;
5310 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5311 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5312 return;
5313 }
5314 break;
5315 }
5316 case Intrinsic::aarch64_sme_readz_vert_x2: {
5317 if (VT == MVT::nxv16i8) {
5318 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5319 return;
5320 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5321 VT == MVT::nxv8bf16) {
5322 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5323 return;
5324 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5325 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5326 return;
5327 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5328 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5329 return;
5330 }
5331 break;
5332 }
5333 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5334 if (VT == MVT::nxv16i8) {
5335 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5336 return;
5337 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5338 VT == MVT::nxv8bf16) {
5339 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5340 return;
5341 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5342 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5343 return;
5344 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5345 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5346 return;
5347 }
5348 break;
5349 }
5350 case Intrinsic::aarch64_sme_readz_vert_x4: {
5351 if (VT == MVT::nxv16i8) {
5352 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5353 return;
5354 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5355 VT == MVT::nxv8bf16) {
5356 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5357 return;
5358 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5359 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5360 return;
5361 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5362 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5363 return;
5364 }
5365 break;
5366 }
5367 case Intrinsic::aarch64_sme_readz_x2: {
5368 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5369 AArch64::ZA);
5370 return;
5371 }
5372 case Intrinsic::aarch64_sme_readz_x4: {
5373 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5374 AArch64::ZA);
5375 return;
5376 }
5377 case Intrinsic::swift_async_context_addr: {
5378 SDLoc DL(Node);
5379 SDValue Chain = Node->getOperand(0);
5380 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5381 SDValue Res = SDValue(
5382 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5383 CurDAG->getTargetConstant(8, DL, MVT::i32),
5384 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5385 0);
5386 ReplaceUses(SDValue(Node, 0), Res);
5387 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5388 CurDAG->RemoveDeadNode(Node);
5389
5390 auto &MF = CurDAG->getMachineFunction();
5391 MF.getFrameInfo().setFrameAddressIsTaken(true);
5392 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5393 return;
5394 }
5395 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5396 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5397 Node->getValueType(0),
5398 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5399 AArch64::LUTI2_4ZTZI_S}))
5400 // Second Immediate must be <= 3:
5401 SelectMultiVectorLuti(Node, 4, Opc, 3);
5402 return;
5403 }
5404 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5405 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5406 Node->getValueType(0),
5407 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5408 // Second Immediate must be <= 1:
5409 SelectMultiVectorLuti(Node, 4, Opc, 1);
5410 return;
5411 }
5412 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5413 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5414 Node->getValueType(0),
5415 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5416 AArch64::LUTI2_2ZTZI_S}))
5417 // Second Immediate must be <= 7:
5418 SelectMultiVectorLuti(Node, 2, Opc, 7);
5419 return;
5420 }
5421 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5422 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5423 Node->getValueType(0),
5424 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5425 AArch64::LUTI4_2ZTZI_S}))
5426 // Second Immediate must be <= 3:
5427 SelectMultiVectorLuti(Node, 2, Opc, 3);
5428 return;
5429 }
5430 }
5431 } break;
5433 unsigned IntNo = Node->getConstantOperandVal(0);
5434 switch (IntNo) {
5435 default:
5436 break;
5437 case Intrinsic::aarch64_tagp:
5438 SelectTagP(Node);
5439 return;
5440 case Intrinsic::aarch64_neon_tbl2:
5441 SelectTable(Node, 2,
5442 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5443 false);
5444 return;
5445 case Intrinsic::aarch64_neon_tbl3:
5446 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5447 : AArch64::TBLv16i8Three,
5448 false);
5449 return;
5450 case Intrinsic::aarch64_neon_tbl4:
5451 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5452 : AArch64::TBLv16i8Four,
5453 false);
5454 return;
5455 case Intrinsic::aarch64_neon_tbx2:
5456 SelectTable(Node, 2,
5457 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5458 true);
5459 return;
5460 case Intrinsic::aarch64_neon_tbx3:
5461 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5462 : AArch64::TBXv16i8Three,
5463 true);
5464 return;
5465 case Intrinsic::aarch64_neon_tbx4:
5466 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5467 : AArch64::TBXv16i8Four,
5468 true);
5469 return;
5470 case Intrinsic::aarch64_sve_srshl_single_x2:
5471 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5472 Node->getValueType(0),
5473 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5474 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5475 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5476 return;
5477 case Intrinsic::aarch64_sve_srshl_single_x4:
5478 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5479 Node->getValueType(0),
5480 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5481 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5482 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5483 return;
5484 case Intrinsic::aarch64_sve_urshl_single_x2:
5485 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5486 Node->getValueType(0),
5487 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5488 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5489 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5490 return;
5491 case Intrinsic::aarch64_sve_urshl_single_x4:
5492 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5493 Node->getValueType(0),
5494 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5495 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5496 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5497 return;
5498 case Intrinsic::aarch64_sve_srshl_x2:
5499 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5500 Node->getValueType(0),
5501 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5502 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5503 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5504 return;
5505 case Intrinsic::aarch64_sve_srshl_x4:
5506 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5507 Node->getValueType(0),
5508 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5509 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5510 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5511 return;
5512 case Intrinsic::aarch64_sve_urshl_x2:
5513 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5514 Node->getValueType(0),
5515 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5516 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5517 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5518 return;
5519 case Intrinsic::aarch64_sve_urshl_x4:
5520 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5521 Node->getValueType(0),
5522 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5523 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5524 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5525 return;
5526 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5527 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5528 Node->getValueType(0),
5529 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5530 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5531 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5532 return;
5533 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5534 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5535 Node->getValueType(0),
5536 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5537 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5538 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5539 return;
5540 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5541 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5542 Node->getValueType(0),
5543 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5544 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5545 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5546 return;
5547 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5548 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5549 Node->getValueType(0),
5550 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5551 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5552 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5553 return;
5554 case Intrinsic::aarch64_sve_whilege_x2:
5555 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5556 Node->getValueType(0),
5557 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5558 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5559 SelectWhilePair(Node, Op);
5560 return;
5561 case Intrinsic::aarch64_sve_whilegt_x2:
5562 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5563 Node->getValueType(0),
5564 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5565 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5566 SelectWhilePair(Node, Op);
5567 return;
5568 case Intrinsic::aarch64_sve_whilehi_x2:
5569 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5570 Node->getValueType(0),
5571 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5572 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5573 SelectWhilePair(Node, Op);
5574 return;
5575 case Intrinsic::aarch64_sve_whilehs_x2:
5576 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5577 Node->getValueType(0),
5578 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5579 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5580 SelectWhilePair(Node, Op);
5581 return;
5582 case Intrinsic::aarch64_sve_whilele_x2:
5583 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5584 Node->getValueType(0),
5585 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5586 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5587 SelectWhilePair(Node, Op);
5588 return;
5589 case Intrinsic::aarch64_sve_whilelo_x2:
5590 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5591 Node->getValueType(0),
5592 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5593 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5594 SelectWhilePair(Node, Op);
5595 return;
5596 case Intrinsic::aarch64_sve_whilels_x2:
5597 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5598 Node->getValueType(0),
5599 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5600 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5601 SelectWhilePair(Node, Op);
5602 return;
5603 case Intrinsic::aarch64_sve_whilelt_x2:
5604 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5605 Node->getValueType(0),
5606 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5607 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5608 SelectWhilePair(Node, Op);
5609 return;
5610 case Intrinsic::aarch64_sve_smax_single_x2:
5611 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5612 Node->getValueType(0),
5613 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
5614 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
5615 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5616 return;
5617 case Intrinsic::aarch64_sve_umax_single_x2:
5618 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5619 Node->getValueType(0),
5620 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
5621 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
5622 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5623 return;
5624 case Intrinsic::aarch64_sve_fmax_single_x2:
5625 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5626 Node->getValueType(0),
5627 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
5628 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
5629 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5630 return;
5631 case Intrinsic::aarch64_sve_smax_single_x4:
5632 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5633 Node->getValueType(0),
5634 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
5635 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
5636 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5637 return;
5638 case Intrinsic::aarch64_sve_umax_single_x4:
5639 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5640 Node->getValueType(0),
5641 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
5642 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
5643 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5644 return;
5645 case Intrinsic::aarch64_sve_fmax_single_x4:
5646 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5647 Node->getValueType(0),
5648 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
5649 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
5650 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5651 return;
5652 case Intrinsic::aarch64_sve_smin_single_x2:
5653 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5654 Node->getValueType(0),
5655 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
5656 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
5657 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5658 return;
5659 case Intrinsic::aarch64_sve_umin_single_x2:
5660 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5661 Node->getValueType(0),
5662 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
5663 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
5664 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5665 return;
5666 case Intrinsic::aarch64_sve_fmin_single_x2:
5667 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5668 Node->getValueType(0),
5669 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
5670 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
5671 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5672 return;
5673 case Intrinsic::aarch64_sve_smin_single_x4:
5674 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5675 Node->getValueType(0),
5676 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
5677 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
5678 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5679 return;
5680 case Intrinsic::aarch64_sve_umin_single_x4:
5681 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5682 Node->getValueType(0),
5683 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
5684 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
5685 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5686 return;
5687 case Intrinsic::aarch64_sve_fmin_single_x4:
5688 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5689 Node->getValueType(0),
5690 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
5691 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
5692 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5693 return;
5694 case Intrinsic::aarch64_sve_smax_x2:
5695 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5696 Node->getValueType(0),
5697 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
5698 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
5699 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5700 return;
5701 case Intrinsic::aarch64_sve_umax_x2:
5702 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5703 Node->getValueType(0),
5704 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
5705 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
5706 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5707 return;
5708 case Intrinsic::aarch64_sve_fmax_x2:
5709 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5710 Node->getValueType(0),
5711 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
5712 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
5713 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5714 return;
5715 case Intrinsic::aarch64_sve_smax_x4:
5716 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5717 Node->getValueType(0),
5718 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
5719 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
5720 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5721 return;
5722 case Intrinsic::aarch64_sve_umax_x4:
5723 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5724 Node->getValueType(0),
5725 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
5726 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
5727 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5728 return;
5729 case Intrinsic::aarch64_sve_fmax_x4:
5730 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5731 Node->getValueType(0),
5732 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
5733 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
5734 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5735 return;
5736 case Intrinsic::aarch64_sve_smin_x2:
5737 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5738 Node->getValueType(0),
5739 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
5740 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
5741 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5742 return;
5743 case Intrinsic::aarch64_sve_umin_x2:
5744 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5745 Node->getValueType(0),
5746 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
5747 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
5748 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5749 return;
5750 case Intrinsic::aarch64_sve_fmin_x2:
5751 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5752 Node->getValueType(0),
5753 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
5754 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
5755 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5756 return;
5757 case Intrinsic::aarch64_sve_smin_x4:
5758 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5759 Node->getValueType(0),
5760 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
5761 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
5762 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5763 return;
5764 case Intrinsic::aarch64_sve_umin_x4:
5765 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5766 Node->getValueType(0),
5767 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
5768 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
5769 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5770 return;
5771 case Intrinsic::aarch64_sve_fmin_x4:
5772 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5773 Node->getValueType(0),
5774 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
5775 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
5776 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5777 return;
5778 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
5779 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5780 Node->getValueType(0),
5781 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
5782 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
5783 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5784 return;
5785 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
5786 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5787 Node->getValueType(0),
5788 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
5789 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
5790 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5791 return;
5792 case Intrinsic::aarch64_sve_fminnm_single_x2:
5793 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5794 Node->getValueType(0),
5795 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
5796 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
5797 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5798 return;
5799 case Intrinsic::aarch64_sve_fminnm_single_x4:
5800 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5801 Node->getValueType(0),
5802 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
5803 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
5804 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5805 return;
5806 case Intrinsic::aarch64_sve_fmaxnm_x2:
5807 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5808 Node->getValueType(0),
5809 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
5810 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
5811 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5812 return;
5813 case Intrinsic::aarch64_sve_fmaxnm_x4:
5814 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5815 Node->getValueType(0),
5816 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
5817 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
5818 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5819 return;
5820 case Intrinsic::aarch64_sve_fminnm_x2:
5821 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5822 Node->getValueType(0),
5823 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
5824 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
5825 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5826 return;
5827 case Intrinsic::aarch64_sve_fminnm_x4:
5828 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5829 Node->getValueType(0),
5830 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
5831 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
5832 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5833 return;
5834 case Intrinsic::aarch64_sve_fcvtzs_x2:
5835 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
5836 return;
5837 case Intrinsic::aarch64_sve_scvtf_x2:
5838 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
5839 return;
5840 case Intrinsic::aarch64_sve_fcvtzu_x2:
5841 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
5842 return;
5843 case Intrinsic::aarch64_sve_ucvtf_x2:
5844 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
5845 return;
5846 case Intrinsic::aarch64_sve_fcvtzs_x4:
5847 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
5848 return;
5849 case Intrinsic::aarch64_sve_scvtf_x4:
5850 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
5851 return;
5852 case Intrinsic::aarch64_sve_fcvtzu_x4:
5853 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
5854 return;
5855 case Intrinsic::aarch64_sve_ucvtf_x4:
5856 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
5857 return;
5858 case Intrinsic::aarch64_sve_fcvt_widen_x2:
5859 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
5860 return;
5861 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
5862 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
5863 return;
5864 case Intrinsic::aarch64_sve_sclamp_single_x2:
5865 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5866 Node->getValueType(0),
5867 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
5868 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
5869 SelectClamp(Node, 2, Op);
5870 return;
5871 case Intrinsic::aarch64_sve_uclamp_single_x2:
5872 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5873 Node->getValueType(0),
5874 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
5875 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
5876 SelectClamp(Node, 2, Op);
5877 return;
5878 case Intrinsic::aarch64_sve_fclamp_single_x2:
5879 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5880 Node->getValueType(0),
5881 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
5882 AArch64::FCLAMP_VG2_2Z2Z_D}))
5883 SelectClamp(Node, 2, Op);
5884 return;
5885 case Intrinsic::aarch64_sve_bfclamp_single_x2:
5886 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
5887 return;
5888 case Intrinsic::aarch64_sve_sclamp_single_x4:
5889 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5890 Node->getValueType(0),
5891 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
5892 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
5893 SelectClamp(Node, 4, Op);
5894 return;
5895 case Intrinsic::aarch64_sve_uclamp_single_x4:
5896 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5897 Node->getValueType(0),
5898 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
5899 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
5900 SelectClamp(Node, 4, Op);
5901 return;
5902 case Intrinsic::aarch64_sve_fclamp_single_x4:
5903 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5904 Node->getValueType(0),
5905 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
5906 AArch64::FCLAMP_VG4_4Z4Z_D}))
5907 SelectClamp(Node, 4, Op);
5908 return;
5909 case Intrinsic::aarch64_sve_bfclamp_single_x4:
5910 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
5911 return;
5912 case Intrinsic::aarch64_sve_add_single_x2:
5913 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5914 Node->getValueType(0),
5915 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
5916 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
5917 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5918 return;
5919 case Intrinsic::aarch64_sve_add_single_x4:
5920 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5921 Node->getValueType(0),
5922 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
5923 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
5924 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5925 return;
5926 case Intrinsic::aarch64_sve_zip_x2:
5927 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5928 Node->getValueType(0),
5929 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
5930 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
5931 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
5932 return;
5933 case Intrinsic::aarch64_sve_zipq_x2:
5934 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
5935 AArch64::ZIP_VG2_2ZZZ_Q);
5936 return;
5937 case Intrinsic::aarch64_sve_zip_x4:
5938 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5939 Node->getValueType(0),
5940 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
5941 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
5942 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
5943 return;
5944 case Intrinsic::aarch64_sve_zipq_x4:
5945 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
5946 AArch64::ZIP_VG4_4Z4Z_Q);
5947 return;
5948 case Intrinsic::aarch64_sve_uzp_x2:
5949 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5950 Node->getValueType(0),
5951 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
5952 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
5953 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
5954 return;
5955 case Intrinsic::aarch64_sve_uzpq_x2:
5956 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
5957 AArch64::UZP_VG2_2ZZZ_Q);
5958 return;
5959 case Intrinsic::aarch64_sve_uzp_x4:
5960 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5961 Node->getValueType(0),
5962 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
5963 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
5964 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
5965 return;
5966 case Intrinsic::aarch64_sve_uzpq_x4:
5967 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
5968 AArch64::UZP_VG4_4Z4Z_Q);
5969 return;
5970 case Intrinsic::aarch64_sve_sel_x2:
5971 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5972 Node->getValueType(0),
5973 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
5974 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
5975 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
5976 return;
5977 case Intrinsic::aarch64_sve_sel_x4:
5978 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5979 Node->getValueType(0),
5980 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
5981 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
5982 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
5983 return;
5984 case Intrinsic::aarch64_sve_frinta_x2:
5985 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
5986 return;
5987 case Intrinsic::aarch64_sve_frinta_x4:
5988 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
5989 return;
5990 case Intrinsic::aarch64_sve_frintm_x2:
5991 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
5992 return;
5993 case Intrinsic::aarch64_sve_frintm_x4:
5994 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
5995 return;
5996 case Intrinsic::aarch64_sve_frintn_x2:
5997 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
5998 return;
5999 case Intrinsic::aarch64_sve_frintn_x4:
6000 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6001 return;
6002 case Intrinsic::aarch64_sve_frintp_x2:
6003 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6004 return;
6005 case Intrinsic::aarch64_sve_frintp_x4:
6006 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6007 return;
6008 case Intrinsic::aarch64_sve_sunpk_x2:
6009 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6010 Node->getValueType(0),
6011 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6012 AArch64::SUNPK_VG2_2ZZ_D}))
6013 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6014 return;
6015 case Intrinsic::aarch64_sve_uunpk_x2:
6016 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6017 Node->getValueType(0),
6018 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6019 AArch64::UUNPK_VG2_2ZZ_D}))
6020 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6021 return;
6022 case Intrinsic::aarch64_sve_sunpk_x4:
6023 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6024 Node->getValueType(0),
6025 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6026 AArch64::SUNPK_VG4_4Z2Z_D}))
6027 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6028 return;
6029 case Intrinsic::aarch64_sve_uunpk_x4:
6030 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6031 Node->getValueType(0),
6032 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6033 AArch64::UUNPK_VG4_4Z2Z_D}))
6034 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6035 return;
6036 case Intrinsic::aarch64_sve_pext_x2: {
6037 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6038 Node->getValueType(0),
6039 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6040 AArch64::PEXT_2PCI_D}))
6041 SelectPExtPair(Node, Op);
6042 return;
6043 }
6044 }
6045 break;
6046 }
6047 case ISD::INTRINSIC_VOID: {
6048 unsigned IntNo = Node->getConstantOperandVal(1);
6049 if (Node->getNumOperands() >= 3)
6050 VT = Node->getOperand(2)->getValueType(0);
6051 switch (IntNo) {
6052 default:
6053 break;
6054 case Intrinsic::aarch64_neon_st1x2: {
6055 if (VT == MVT::v8i8) {
6056 SelectStore(Node, 2, AArch64::ST1Twov8b);
6057 return;
6058 } else if (VT == MVT::v16i8) {
6059 SelectStore(Node, 2, AArch64::ST1Twov16b);
6060 return;
6061 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6062 VT == MVT::v4bf16) {
6063 SelectStore(Node, 2, AArch64::ST1Twov4h);
6064 return;
6065 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6066 VT == MVT::v8bf16) {
6067 SelectStore(Node, 2, AArch64::ST1Twov8h);
6068 return;
6069 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6070 SelectStore(Node, 2, AArch64::ST1Twov2s);
6071 return;
6072 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6073 SelectStore(Node, 2, AArch64::ST1Twov4s);
6074 return;
6075 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6076 SelectStore(Node, 2, AArch64::ST1Twov2d);
6077 return;
6078 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6079 SelectStore(Node, 2, AArch64::ST1Twov1d);
6080 return;
6081 }
6082 break;
6083 }
6084 case Intrinsic::aarch64_neon_st1x3: {
6085 if (VT == MVT::v8i8) {
6086 SelectStore(Node, 3, AArch64::ST1Threev8b);
6087 return;
6088 } else if (VT == MVT::v16i8) {
6089 SelectStore(Node, 3, AArch64::ST1Threev16b);
6090 return;
6091 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6092 VT == MVT::v4bf16) {
6093 SelectStore(Node, 3, AArch64::ST1Threev4h);
6094 return;
6095 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6096 VT == MVT::v8bf16) {
6097 SelectStore(Node, 3, AArch64::ST1Threev8h);
6098 return;
6099 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6100 SelectStore(Node, 3, AArch64::ST1Threev2s);
6101 return;
6102 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6103 SelectStore(Node, 3, AArch64::ST1Threev4s);
6104 return;
6105 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6106 SelectStore(Node, 3, AArch64::ST1Threev2d);
6107 return;
6108 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6109 SelectStore(Node, 3, AArch64::ST1Threev1d);
6110 return;
6111 }
6112 break;
6113 }
6114 case Intrinsic::aarch64_neon_st1x4: {
6115 if (VT == MVT::v8i8) {
6116 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6117 return;
6118 } else if (VT == MVT::v16i8) {
6119 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6120 return;
6121 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6122 VT == MVT::v4bf16) {
6123 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6124 return;
6125 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6126 VT == MVT::v8bf16) {
6127 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6128 return;
6129 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6130 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6131 return;
6132 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6133 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6134 return;
6135 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6136 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6137 return;
6138 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6139 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6140 return;
6141 }
6142 break;
6143 }
6144 case Intrinsic::aarch64_neon_st2: {
6145 if (VT == MVT::v8i8) {
6146 SelectStore(Node, 2, AArch64::ST2Twov8b);
6147 return;
6148 } else if (VT == MVT::v16i8) {
6149 SelectStore(Node, 2, AArch64::ST2Twov16b);
6150 return;
6151 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6152 VT == MVT::v4bf16) {
6153 SelectStore(Node, 2, AArch64::ST2Twov4h);
6154 return;
6155 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6156 VT == MVT::v8bf16) {
6157 SelectStore(Node, 2, AArch64::ST2Twov8h);
6158 return;
6159 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6160 SelectStore(Node, 2, AArch64::ST2Twov2s);
6161 return;
6162 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6163 SelectStore(Node, 2, AArch64::ST2Twov4s);
6164 return;
6165 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6166 SelectStore(Node, 2, AArch64::ST2Twov2d);
6167 return;
6168 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6169 SelectStore(Node, 2, AArch64::ST1Twov1d);
6170 return;
6171 }
6172 break;
6173 }
6174 case Intrinsic::aarch64_neon_st3: {
6175 if (VT == MVT::v8i8) {
6176 SelectStore(Node, 3, AArch64::ST3Threev8b);
6177 return;
6178 } else if (VT == MVT::v16i8) {
6179 SelectStore(Node, 3, AArch64::ST3Threev16b);
6180 return;
6181 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6182 VT == MVT::v4bf16) {
6183 SelectStore(Node, 3, AArch64::ST3Threev4h);
6184 return;
6185 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6186 VT == MVT::v8bf16) {
6187 SelectStore(Node, 3, AArch64::ST3Threev8h);
6188 return;
6189 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6190 SelectStore(Node, 3, AArch64::ST3Threev2s);
6191 return;
6192 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6193 SelectStore(Node, 3, AArch64::ST3Threev4s);
6194 return;
6195 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6196 SelectStore(Node, 3, AArch64::ST3Threev2d);
6197 return;
6198 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6199 SelectStore(Node, 3, AArch64::ST1Threev1d);
6200 return;
6201 }
6202 break;
6203 }
6204 case Intrinsic::aarch64_neon_st4: {
6205 if (VT == MVT::v8i8) {
6206 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6207 return;
6208 } else if (VT == MVT::v16i8) {
6209 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6210 return;
6211 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6212 VT == MVT::v4bf16) {
6213 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6214 return;
6215 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6216 VT == MVT::v8bf16) {
6217 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6218 return;
6219 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6220 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6221 return;
6222 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6223 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6224 return;
6225 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6226 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6227 return;
6228 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6229 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6230 return;
6231 }
6232 break;
6233 }
6234 case Intrinsic::aarch64_neon_st2lane: {
6235 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6236 SelectStoreLane(Node, 2, AArch64::ST2i8);
6237 return;
6238 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6239 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6240 SelectStoreLane(Node, 2, AArch64::ST2i16);
6241 return;
6242 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6243 VT == MVT::v2f32) {
6244 SelectStoreLane(Node, 2, AArch64::ST2i32);
6245 return;
6246 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6247 VT == MVT::v1f64) {
6248 SelectStoreLane(Node, 2, AArch64::ST2i64);
6249 return;
6250 }
6251 break;
6252 }
6253 case Intrinsic::aarch64_neon_st3lane: {
6254 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6255 SelectStoreLane(Node, 3, AArch64::ST3i8);
6256 return;
6257 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6258 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6259 SelectStoreLane(Node, 3, AArch64::ST3i16);
6260 return;
6261 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6262 VT == MVT::v2f32) {
6263 SelectStoreLane(Node, 3, AArch64::ST3i32);
6264 return;
6265 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6266 VT == MVT::v1f64) {
6267 SelectStoreLane(Node, 3, AArch64::ST3i64);
6268 return;
6269 }
6270 break;
6271 }
6272 case Intrinsic::aarch64_neon_st4lane: {
6273 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6274 SelectStoreLane(Node, 4, AArch64::ST4i8);
6275 return;
6276 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6277 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6278 SelectStoreLane(Node, 4, AArch64::ST4i16);
6279 return;
6280 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6281 VT == MVT::v2f32) {
6282 SelectStoreLane(Node, 4, AArch64::ST4i32);
6283 return;
6284 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6285 VT == MVT::v1f64) {
6286 SelectStoreLane(Node, 4, AArch64::ST4i64);
6287 return;
6288 }
6289 break;
6290 }
6291 case Intrinsic::aarch64_sve_st2q: {
6292 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6293 return;
6294 }
6295 case Intrinsic::aarch64_sve_st3q: {
6296 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6297 return;
6298 }
6299 case Intrinsic::aarch64_sve_st4q: {
6300 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6301 return;
6302 }
6303 case Intrinsic::aarch64_sve_st2: {
6304 if (VT == MVT::nxv16i8) {
6305 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6306 return;
6307 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6308 VT == MVT::nxv8bf16) {
6309 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6310 return;
6311 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6312 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6313 return;
6314 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6315 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6316 return;
6317 }
6318 break;
6319 }
6320 case Intrinsic::aarch64_sve_st3: {
6321 if (VT == MVT::nxv16i8) {
6322 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6323 return;
6324 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6325 VT == MVT::nxv8bf16) {
6326 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6327 return;
6328 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6329 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6330 return;
6331 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6332 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6333 return;
6334 }
6335 break;
6336 }
6337 case Intrinsic::aarch64_sve_st4: {
6338 if (VT == MVT::nxv16i8) {
6339 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6340 return;
6341 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6342 VT == MVT::nxv8bf16) {
6343 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6344 return;
6345 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6346 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6347 return;
6348 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6349 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6350 return;
6351 }
6352 break;
6353 }
6354 }
6355 break;
6356 }
6357 case AArch64ISD::LD2post: {
6358 if (VT == MVT::v8i8) {
6359 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6360 return;
6361 } else if (VT == MVT::v16i8) {
6362 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6363 return;
6364 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6365 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6366 return;
6367 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6368 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6369 return;
6370 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6371 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6372 return;
6373 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6374 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6375 return;
6376 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6377 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6378 return;
6379 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6380 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6381 return;
6382 }
6383 break;
6384 }
6385 case AArch64ISD::LD3post: {
6386 if (VT == MVT::v8i8) {
6387 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6388 return;
6389 } else if (VT == MVT::v16i8) {
6390 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6391 return;
6392 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6393 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6394 return;
6395 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6396 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6397 return;
6398 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6399 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6400 return;
6401 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6402 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6403 return;
6404 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6405 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6406 return;
6407 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6408 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6409 return;
6410 }
6411 break;
6412 }
6413 case AArch64ISD::LD4post: {
6414 if (VT == MVT::v8i8) {
6415 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6416 return;
6417 } else if (VT == MVT::v16i8) {
6418 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6419 return;
6420 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6421 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6422 return;
6423 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6424 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6425 return;
6426 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6427 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6428 return;
6429 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6430 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6431 return;
6432 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6433 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6434 return;
6435 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6436 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6437 return;
6438 }
6439 break;
6440 }
6441 case AArch64ISD::LD1x2post: {
6442 if (VT == MVT::v8i8) {
6443 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6444 return;
6445 } else if (VT == MVT::v16i8) {
6446 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6447 return;
6448 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6449 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6450 return;
6451 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6452 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6453 return;
6454 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6455 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6456 return;
6457 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6458 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6459 return;
6460 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6461 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6462 return;
6463 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6464 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6465 return;
6466 }
6467 break;
6468 }
6469 case AArch64ISD::LD1x3post: {
6470 if (VT == MVT::v8i8) {
6471 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6472 return;
6473 } else if (VT == MVT::v16i8) {
6474 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6475 return;
6476 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6477 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6478 return;
6479 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6480 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6481 return;
6482 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6483 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6484 return;
6485 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6486 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6487 return;
6488 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6489 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6490 return;
6491 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6492 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6493 return;
6494 }
6495 break;
6496 }
6497 case AArch64ISD::LD1x4post: {
6498 if (VT == MVT::v8i8) {
6499 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6500 return;
6501 } else if (VT == MVT::v16i8) {
6502 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6503 return;
6504 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6505 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6506 return;
6507 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6508 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6509 return;
6510 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6511 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6512 return;
6513 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6514 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6515 return;
6516 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6517 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6518 return;
6519 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6520 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6521 return;
6522 }
6523 break;
6524 }
6526 if (VT == MVT::v8i8) {
6527 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6528 return;
6529 } else if (VT == MVT::v16i8) {
6530 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6531 return;
6532 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6533 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6534 return;
6535 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6536 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6537 return;
6538 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6539 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6540 return;
6541 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6542 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6543 return;
6544 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6545 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6546 return;
6547 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6548 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6549 return;
6550 }
6551 break;
6552 }
6554 if (VT == MVT::v8i8) {
6555 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6556 return;
6557 } else if (VT == MVT::v16i8) {
6558 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6559 return;
6560 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6561 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6562 return;
6563 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6564 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
6565 return;
6566 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6567 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
6568 return;
6569 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6570 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6571 return;
6572 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6573 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
6574 return;
6575 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6576 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
6577 return;
6578 }
6579 break;
6580 }
6582 if (VT == MVT::v8i8) {
6583 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
6584 return;
6585 } else if (VT == MVT::v16i8) {
6586 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
6587 return;
6588 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6589 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
6590 return;
6591 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6592 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
6593 return;
6594 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6595 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
6596 return;
6597 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6598 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
6599 return;
6600 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6601 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
6602 return;
6603 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6604 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
6605 return;
6606 }
6607 break;
6608 }
6610 if (VT == MVT::v8i8) {
6611 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
6612 return;
6613 } else if (VT == MVT::v16i8) {
6614 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
6615 return;
6616 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6617 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
6618 return;
6619 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6620 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
6621 return;
6622 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6623 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
6624 return;
6625 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6626 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
6627 return;
6628 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6629 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
6630 return;
6631 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6632 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
6633 return;
6634 }
6635 break;
6636 }
6638 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6639 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
6640 return;
6641 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6642 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6643 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
6644 return;
6645 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6646 VT == MVT::v2f32) {
6647 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
6648 return;
6649 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6650 VT == MVT::v1f64) {
6651 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
6652 return;
6653 }
6654 break;
6655 }
6657 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6658 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
6659 return;
6660 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6661 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6662 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
6663 return;
6664 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6665 VT == MVT::v2f32) {
6666 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
6667 return;
6668 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6669 VT == MVT::v1f64) {
6670 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
6671 return;
6672 }
6673 break;
6674 }
6676 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6677 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
6678 return;
6679 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6680 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6681 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
6682 return;
6683 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6684 VT == MVT::v2f32) {
6685 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
6686 return;
6687 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6688 VT == MVT::v1f64) {
6689 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
6690 return;
6691 }
6692 break;
6693 }
6695 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6696 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
6697 return;
6698 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6699 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6700 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
6701 return;
6702 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6703 VT == MVT::v2f32) {
6704 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
6705 return;
6706 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6707 VT == MVT::v1f64) {
6708 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
6709 return;
6710 }
6711 break;
6712 }
6713 case AArch64ISD::ST2post: {
6714 VT = Node->getOperand(1).getValueType();
6715 if (VT == MVT::v8i8) {
6716 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
6717 return;
6718 } else if (VT == MVT::v16i8) {
6719 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
6720 return;
6721 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6722 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
6723 return;
6724 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6725 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
6726 return;
6727 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6728 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
6729 return;
6730 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6731 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
6732 return;
6733 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6734 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
6735 return;
6736 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6737 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6738 return;
6739 }
6740 break;
6741 }
6742 case AArch64ISD::ST3post: {
6743 VT = Node->getOperand(1).getValueType();
6744 if (VT == MVT::v8i8) {
6745 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
6746 return;
6747 } else if (VT == MVT::v16i8) {
6748 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
6749 return;
6750 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6751 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
6752 return;
6753 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6754 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
6755 return;
6756 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6757 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
6758 return;
6759 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6760 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
6761 return;
6762 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6763 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
6764 return;
6765 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6766 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6767 return;
6768 }
6769 break;
6770 }
6771 case AArch64ISD::ST4post: {
6772 VT = Node->getOperand(1).getValueType();
6773 if (VT == MVT::v8i8) {
6774 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
6775 return;
6776 } else if (VT == MVT::v16i8) {
6777 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
6778 return;
6779 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6780 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
6781 return;
6782 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6783 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
6784 return;
6785 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6786 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
6787 return;
6788 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6789 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
6790 return;
6791 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6792 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
6793 return;
6794 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6795 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6796 return;
6797 }
6798 break;
6799 }
6800 case AArch64ISD::ST1x2post: {
6801 VT = Node->getOperand(1).getValueType();
6802 if (VT == MVT::v8i8) {
6803 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
6804 return;
6805 } else if (VT == MVT::v16i8) {
6806 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
6807 return;
6808 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6809 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
6810 return;
6811 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6812 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
6813 return;
6814 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6815 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
6816 return;
6817 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6818 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
6819 return;
6820 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6821 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6822 return;
6823 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6824 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
6825 return;
6826 }
6827 break;
6828 }
6829 case AArch64ISD::ST1x3post: {
6830 VT = Node->getOperand(1).getValueType();
6831 if (VT == MVT::v8i8) {
6832 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
6833 return;
6834 } else if (VT == MVT::v16i8) {
6835 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
6836 return;
6837 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6838 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
6839 return;
6840 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
6841 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
6842 return;
6843 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6844 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
6845 return;
6846 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6847 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
6848 return;
6849 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6850 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6851 return;
6852 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6853 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
6854 return;
6855 }
6856 break;
6857 }
6858 case AArch64ISD::ST1x4post: {
6859 VT = Node->getOperand(1).getValueType();
6860 if (VT == MVT::v8i8) {
6861 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
6862 return;
6863 } else if (VT == MVT::v16i8) {
6864 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
6865 return;
6866 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6867 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
6868 return;
6869 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6870 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
6871 return;
6872 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6873 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
6874 return;
6875 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6876 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
6877 return;
6878 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6879 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6880 return;
6881 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6882 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
6883 return;
6884 }
6885 break;
6886 }
6888 VT = Node->getOperand(1).getValueType();
6889 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6890 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
6891 return;
6892 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6893 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6894 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
6895 return;
6896 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6897 VT == MVT::v2f32) {
6898 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
6899 return;
6900 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6901 VT == MVT::v1f64) {
6902 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
6903 return;
6904 }
6905 break;
6906 }
6908 VT = Node->getOperand(1).getValueType();
6909 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6910 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
6911 return;
6912 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6913 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6914 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
6915 return;
6916 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6917 VT == MVT::v2f32) {
6918 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
6919 return;
6920 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6921 VT == MVT::v1f64) {
6922 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
6923 return;
6924 }
6925 break;
6926 }
6928 VT = Node->getOperand(1).getValueType();
6929 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6930 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
6931 return;
6932 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6933 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6934 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
6935 return;
6936 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6937 VT == MVT::v2f32) {
6938 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
6939 return;
6940 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6941 VT == MVT::v1f64) {
6942 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
6943 return;
6944 }
6945 break;
6946 }
6948 if (VT == MVT::nxv16i8) {
6949 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
6950 return;
6951 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6952 VT == MVT::nxv8bf16) {
6953 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
6954 return;
6955 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6956 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
6957 return;
6958 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6959 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
6960 return;
6961 }
6962 break;
6963 }
6965 if (VT == MVT::nxv16i8) {
6966 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
6967 return;
6968 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6969 VT == MVT::nxv8bf16) {
6970 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
6971 return;
6972 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6973 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
6974 return;
6975 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6976 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
6977 return;
6978 }
6979 break;
6980 }
6982 if (VT == MVT::nxv16i8) {
6983 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
6984 return;
6985 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6986 VT == MVT::nxv8bf16) {
6987 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
6988 return;
6989 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6990 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
6991 return;
6992 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6993 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
6994 return;
6995 }
6996 break;
6997 }
6998 }
6999
7000 // Select the default instruction
7001 SelectCode(Node);
7002}
7003
7004/// createAArch64ISelDag - This pass converts a legalized DAG into a
7005/// AArch64-specific DAG, ready for instruction scheduling.
7007 CodeGenOptLevel OptLevel) {
7008 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7009}
7010
7011/// When \p PredVT is a scalable vector predicate in the form
7012/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7013/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7014/// structured vectors (NumVec >1), the output data type is
7015/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7016/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7017/// EVT.
7019 unsigned NumVec) {
7020 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7021 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7022 return EVT();
7023
7024 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7025 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7026 return EVT();
7027
7028 ElementCount EC = PredVT.getVectorElementCount();
7029 EVT ScalarVT =
7030 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7031 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7032
7033 return MemVT;
7034}
7035
7036/// Return the EVT of the data associated to a memory operation in \p
7037/// Root. If such EVT cannot be retrived, it returns an invalid EVT.
7039 if (isa<MemSDNode>(Root))
7040 return cast<MemSDNode>(Root)->getMemoryVT();
7041
7042 if (isa<MemIntrinsicSDNode>(Root))
7043 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
7044
7045 const unsigned Opcode = Root->getOpcode();
7046 // For custom ISD nodes, we have to look at them individually to extract the
7047 // type of the data moved to/from memory.
7048 switch (Opcode) {
7053 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7055 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7058 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
7061 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
7064 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
7065 default:
7066 break;
7067 }
7068
7069 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7070 return EVT();
7071
7072 switch (Root->getConstantOperandVal(1)) {
7073 default:
7074 return EVT();
7075 case Intrinsic::aarch64_sme_ldr:
7076 case Intrinsic::aarch64_sme_str:
7077 return MVT::nxv16i8;
7078 case Intrinsic::aarch64_sve_prf:
7079 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7080 // width of the predicate.
7082 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7083 case Intrinsic::aarch64_sve_ld2_sret:
7084 case Intrinsic::aarch64_sve_ld2q_sret:
7086 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7087 case Intrinsic::aarch64_sve_st2q:
7089 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7090 case Intrinsic::aarch64_sve_ld3_sret:
7091 case Intrinsic::aarch64_sve_ld3q_sret:
7093 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7094 case Intrinsic::aarch64_sve_st3q:
7096 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7097 case Intrinsic::aarch64_sve_ld4_sret:
7098 case Intrinsic::aarch64_sve_ld4q_sret:
7100 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7101 case Intrinsic::aarch64_sve_st4q:
7103 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7104 case Intrinsic::aarch64_sve_ld1udq:
7105 case Intrinsic::aarch64_sve_st1dq:
7106 return EVT(MVT::nxv1i64);
7107 case Intrinsic::aarch64_sve_ld1uwq:
7108 case Intrinsic::aarch64_sve_st1wq:
7109 return EVT(MVT::nxv1i32);
7110 }
7111}
7112
7113/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7114/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7115/// where Root is the memory access using N for its address.
7116template <int64_t Min, int64_t Max>
7117bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7118 SDValue &Base,
7119 SDValue &OffImm) {
7120 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7121 const DataLayout &DL = CurDAG->getDataLayout();
7122 const MachineFrameInfo &MFI = MF->getFrameInfo();
7123
7124 if (N.getOpcode() == ISD::FrameIndex) {
7125 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7126 // We can only encode VL scaled offsets, so only fold in frame indexes
7127 // referencing SVE objects.
7129 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7130 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7131 return true;
7132 }
7133
7134 return false;
7135 }
7136
7137 if (MemVT == EVT())
7138 return false;
7139
7140 if (N.getOpcode() != ISD::ADD)
7141 return false;
7142
7143 SDValue VScale = N.getOperand(1);
7144 if (VScale.getOpcode() != ISD::VSCALE)
7145 return false;
7146
7147 TypeSize TS = MemVT.getSizeInBits();
7148 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7149 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7150
7151 if ((MulImm % MemWidthBytes) != 0)
7152 return false;
7153
7154 int64_t Offset = MulImm / MemWidthBytes;
7155 if (Offset < Min || Offset > Max)
7156 return false;
7157
7158 Base = N.getOperand(0);
7159 if (Base.getOpcode() == ISD::FrameIndex) {
7160 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7161 // We can only encode VL scaled offsets, so only fold in frame indexes
7162 // referencing SVE objects.
7164 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7165 }
7166
7167 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7168 return true;
7169}
7170
7171/// Select register plus register addressing mode for SVE, with scaled
7172/// offset.
7173bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7174 SDValue &Base,
7175 SDValue &Offset) {
7176 if (N.getOpcode() != ISD::ADD)
7177 return false;
7178
7179 // Process an ADD node.
7180 const SDValue LHS = N.getOperand(0);
7181 const SDValue RHS = N.getOperand(1);
7182
7183 // 8 bit data does not come with the SHL node, so it is treated
7184 // separately.
7185 if (Scale == 0) {
7186 Base = LHS;
7187 Offset = RHS;
7188 return true;
7189 }
7190
7191 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7192 int64_t ImmOff = C->getSExtValue();
7193 unsigned Size = 1 << Scale;
7194
7195 // To use the reg+reg addressing mode, the immediate must be a multiple of
7196 // the vector element's byte size.
7197 if (ImmOff % Size)
7198 return false;
7199
7200 SDLoc DL(N);
7201 Base = LHS;
7202 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7203 SDValue Ops[] = {Offset};
7204 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7205 Offset = SDValue(MI, 0);
7206 return true;
7207 }
7208
7209 // Check if the RHS is a shift node with a constant.
7210 if (RHS.getOpcode() != ISD::SHL)
7211 return false;
7212
7213 const SDValue ShiftRHS = RHS.getOperand(1);
7214 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7215 if (C->getZExtValue() == Scale) {
7216 Base = LHS;
7217 Offset = RHS.getOperand(0);
7218 return true;
7219 }
7220
7221 return false;
7222}
7223
7224bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7225 const AArch64TargetLowering *TLI =
7226 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7227
7228 return TLI->isAllActivePredicate(*CurDAG, N);
7229}
7230
7231bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7232 EVT VT = N.getValueType();
7233 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7234}
7235
7236bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7238 unsigned Scale) {
7239 // Try to untangle an ADD node into a 'reg + offset'
7240 if (N.getOpcode() == ISD::ADD)
7241 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
7242 int64_t ImmOff = C->getSExtValue();
7243 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
7244 Base = N.getOperand(0);
7245 Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7246 return true;
7247 }
7248 }
7249
7250 // By default, just match reg + 0.
7251 Base = N;
7252 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7253 return true;
7254}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
#define PASS_NAME
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
#define DEBUG_TYPE
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
aarch64 promote const
amdgpu AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * RHS
Value * LHS
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
DEMANGLE_DUMP_METHOD void dump() const
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
bool getExactInverse(APFloat *inv) const
Definition: APFloat.h:1393
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1235
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1629
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:238
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1448
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1598
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1557
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1414
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:490
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:838
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint8_t getStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
iterator_range< use_iterator > uses()
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:451
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:688
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:685
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:927
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
const SysReg * lookupSysRegByName(StringRef)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1282
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1072
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:811
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:931
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:802
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1278
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:641
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:733
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:808
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1370
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1289
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:826
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:814
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1523
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1554
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:480
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:279
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Extended Value Type.
Definition: ValueTypes.h:34
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:203
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:198
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40