LLVM 23.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64
65 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
66 /// inline asm expressions.
67 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
68 InlineAsm::ConstraintCode ConstraintID,
69 std::vector<SDValue> &OutOps) override;
70
71 template <signed Low, signed High, signed Scale>
72 bool SelectRDVLImm(SDValue N, SDValue &Imm);
73
74 template <signed Low, signed High>
75 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
76
77 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
78 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
79 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
80 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
81 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
82 return SelectShiftedRegister(N, false, Reg, Shift);
83 }
84 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
85 return SelectShiftedRegister(N, true, Reg, Shift);
86 }
87 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
88 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
89 }
90 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
91 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
92 }
93 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
94 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
95 }
96 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
97 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
98 }
99 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
100 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
101 }
102 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
103 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
104 }
105 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
106 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
107 }
108 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
109 return SelectAddrModeIndexed(N, 1, Base, OffImm);
110 }
111 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
112 return SelectAddrModeIndexed(N, 2, Base, OffImm);
113 }
114 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
115 return SelectAddrModeIndexed(N, 4, Base, OffImm);
116 }
117 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
118 return SelectAddrModeIndexed(N, 8, Base, OffImm);
119 }
120 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
121 return SelectAddrModeIndexed(N, 16, Base, OffImm);
122 }
123 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
124 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
125 }
126 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
127 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
128 }
129 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
130 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
131 }
132 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
133 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
134 }
135 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
136 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
137 }
138 template <unsigned Size, unsigned Max>
139 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
140 // Test if there is an appropriate addressing mode and check if the
141 // immediate fits.
142 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
143 if (Found) {
144 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
145 int64_t C = CI->getSExtValue();
146 if (C <= Max)
147 return true;
148 }
149 }
150
151 // Otherwise, base only, materialize address in register.
152 Base = N;
153 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
154 return true;
155 }
156
157 template<int Width>
158 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
159 SDValue &SignExtend, SDValue &DoShift) {
160 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
161 }
162
163 template<int Width>
164 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
165 SDValue &SignExtend, SDValue &DoShift) {
166 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
167 }
168
169 bool SelectExtractHigh(SDValue N, SDValue &Res) {
170 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
171 N = N->getOperand(0);
172 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
173 !isa<ConstantSDNode>(N->getOperand(1)))
174 return false;
175 EVT VT = N->getValueType(0);
176 EVT LVT = N->getOperand(0).getValueType();
177 unsigned Index = N->getConstantOperandVal(1);
178 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
179 Index != VT.getVectorNumElements())
180 return false;
181 Res = N->getOperand(0);
182 return true;
183 }
184
185 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
186 if (N.getOpcode() != AArch64ISD::VLSHR)
187 return false;
188 SDValue Op = N->getOperand(0);
189 EVT VT = Op.getValueType();
190 unsigned ShtAmt = N->getConstantOperandVal(1);
191 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
192 return false;
193
194 APInt Imm;
195 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
196 Imm = APInt(VT.getScalarSizeInBits(),
197 Op.getOperand(1).getConstantOperandVal(0)
198 << Op.getOperand(1).getConstantOperandVal(1));
199 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
200 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
201 Imm = APInt(VT.getScalarSizeInBits(),
202 Op.getOperand(1).getConstantOperandVal(0));
203 else
204 return false;
205
206 if (Imm != 1ULL << (ShtAmt - 1))
207 return false;
208
209 Res1 = Op.getOperand(0);
210 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
211 return true;
212 }
213
214 bool SelectDupZeroOrUndef(SDValue N) {
215 switch(N->getOpcode()) {
216 case ISD::UNDEF:
217 return true;
218 case AArch64ISD::DUP:
219 case ISD::SPLAT_VECTOR: {
220 auto Opnd0 = N->getOperand(0);
221 if (isNullConstant(Opnd0))
222 return true;
223 if (isNullFPConstant(Opnd0))
224 return true;
225 break;
226 }
227 default:
228 break;
229 }
230
231 return false;
232 }
233
234 bool SelectAny(SDValue) { return true; }
235
236 bool SelectDupZero(SDValue N) {
237 switch(N->getOpcode()) {
238 case AArch64ISD::DUP:
239 case ISD::SPLAT_VECTOR: {
240 auto Opnd0 = N->getOperand(0);
241 if (isNullConstant(Opnd0))
242 return true;
243 if (isNullFPConstant(Opnd0))
244 return true;
245 break;
246 }
247 }
248
249 return false;
250 }
251
252 template <MVT::SimpleValueType VT, bool Negate>
253 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
254 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
255 }
256
257 template <MVT::SimpleValueType VT, bool Negate>
258 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
259 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
260 }
261
262 template <MVT::SimpleValueType VT>
263 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
264 return SelectSVECpyDupImm(N, VT, Imm, Shift);
265 }
266
267 template <MVT::SimpleValueType VT, bool Invert = false>
268 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
269 return SelectSVELogicalImm(N, VT, Imm, Invert);
270 }
271
272 template <MVT::SimpleValueType VT>
273 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
274 return SelectSVEArithImm(N, VT, Imm);
275 }
276
277 template <unsigned Low, unsigned High, bool AllowSaturation = false>
278 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
279 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
280 }
281
282 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
283 if (N->getOpcode() != ISD::SPLAT_VECTOR)
284 return false;
285
286 EVT EltVT = N->getValueType(0).getVectorElementType();
287 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
288 /* High */ EltVT.getFixedSizeInBits(),
289 /* AllowSaturation */ true, Imm);
290 }
291
292 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
293 template<signed Min, signed Max, signed Scale, bool Shift>
294 bool SelectCntImm(SDValue N, SDValue &Imm) {
296 return false;
297
298 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
299 if (Shift)
300 MulImm = 1LL << MulImm;
301
302 if ((MulImm % std::abs(Scale)) != 0)
303 return false;
304
305 MulImm /= Scale;
306 if ((MulImm >= Min) && (MulImm <= Max)) {
307 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
308 return true;
309 }
310
311 return false;
312 }
313
314 template <signed Max, signed Scale>
315 bool SelectEXTImm(SDValue N, SDValue &Imm) {
317 return false;
318
319 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
320
321 if (MulImm >= 0 && MulImm <= Max) {
322 MulImm *= Scale;
323 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
324 return true;
325 }
326
327 return false;
328 }
329
330 template <unsigned BaseReg, unsigned Max>
331 bool ImmToReg(SDValue N, SDValue &Imm) {
332 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
333 uint64_t C = CI->getZExtValue();
334
335 if (C > Max)
336 return false;
337
338 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
339 return true;
340 }
341 return false;
342 }
343
344 /// Form sequences of consecutive 64/128-bit registers for use in NEON
345 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
346 /// between 1 and 4 elements. If it contains a single element that is returned
347 /// unchanged; otherwise a REG_SEQUENCE value is returned.
350 // Form a sequence of SVE registers for instructions using list of vectors,
351 // e.g. structured loads and stores (ldN, stN).
352 SDValue createZTuple(ArrayRef<SDValue> Vecs);
353
354 // Similar to above, except the register must start at a multiple of the
355 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
356 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
357
358 /// Generic helper for the createDTuple/createQTuple
359 /// functions. Those should almost always be called instead.
360 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
361 const unsigned SubRegs[]);
362
363 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
364
365 bool tryIndexedLoad(SDNode *N);
366
367 void SelectPtrauthAuth(SDNode *N);
368 void SelectPtrauthResign(SDNode *N);
369
370 bool trySelectStackSlotTagP(SDNode *N);
371 void SelectTagP(SDNode *N);
372
373 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
374 unsigned SubRegIdx);
375 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
376 unsigned SubRegIdx);
377 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
378 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
380 unsigned Opc_rr, unsigned Opc_ri,
381 bool IsIntr = false);
382 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
383 unsigned Scale, unsigned Opc_ri,
384 unsigned Opc_rr);
385 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
386 bool IsZmMulti, unsigned Opcode,
387 bool HasPred = false);
388 void SelectPExtPair(SDNode *N, unsigned Opc);
389 void SelectWhilePair(SDNode *N, unsigned Opc);
390 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
391 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
392 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
394 bool IsTupleInput, unsigned Opc);
395 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
396
397 template <unsigned MaxIdx, unsigned Scale>
398 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
399 unsigned Op);
400 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
401 unsigned Op, unsigned MaxIdx, unsigned Scale,
402 unsigned BaseReg = 0);
403 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
404 /// SVE Reg+Imm addressing mode.
405 template <int64_t Min, int64_t Max>
406 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
407 SDValue &OffImm);
408 /// SVE Reg+Reg address mode.
409 template <unsigned Scale>
410 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
411 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
412 }
413
414 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
415 unsigned Opc, uint32_t MaxImm);
416
417 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
418
419 template <unsigned MaxIdx, unsigned Scale>
420 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
421 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
422 }
423
424 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
425 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
426 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
428 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
429 unsigned Opc_rr, unsigned Opc_ri);
430 std::tuple<unsigned, SDValue, SDValue>
431 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
432 const SDValue &OldBase, const SDValue &OldOffset,
433 unsigned Scale);
434
435 bool tryBitfieldExtractOp(SDNode *N);
436 bool tryBitfieldExtractOpFromSExt(SDNode *N);
437 bool tryBitfieldInsertOp(SDNode *N);
438 bool tryBitfieldInsertInZeroOp(SDNode *N);
439 bool tryShiftAmountMod(SDNode *N);
440
441 bool tryReadRegister(SDNode *N);
442 bool tryWriteRegister(SDNode *N);
443
444 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
445 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
446
447 bool trySelectXAR(SDNode *N);
448
449// Include the pieces autogenerated from the target description.
450#include "AArch64GenDAGISel.inc"
451
452private:
453 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
454 SDValue &Shift);
455 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
456 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
457 SDValue &OffImm) {
458 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
459 }
460 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
461 unsigned Size, SDValue &Base,
462 SDValue &OffImm);
463 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
464 SDValue &OffImm);
465 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
466 SDValue &OffImm);
467 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
468 SDValue &Offset, SDValue &SignExtend,
469 SDValue &DoShift);
470 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
471 SDValue &Offset, SDValue &SignExtend,
472 SDValue &DoShift);
473 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
474 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
475 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
476 SDValue &Offset, SDValue &SignExtend);
477
478 template<unsigned RegWidth>
479 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
480 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
481 }
482 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
483
484 template <unsigned RegWidth>
485 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos) {
486 return SelectCVTFixedPointVec(N, FixedPos, RegWidth);
487 }
488 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos, unsigned Width);
489
490 template<unsigned RegWidth>
491 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
492 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
493 }
494
495 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
496 unsigned Width);
497
498 bool SelectCMP_SWAP(SDNode *N);
499
500 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
501 bool Negate);
502 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
503 bool Negate);
504 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
505 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
506
507 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
508 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
509 bool AllowSaturation, SDValue &Imm);
510
511 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
512 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
513 SDValue &Offset);
514 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
515 SDValue &Offset, unsigned Scale = 1);
516
517 bool SelectAllActivePredicate(SDValue N);
518 bool SelectAnyPredicate(SDValue N);
519
520 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
521
522 template <bool MatchCBB>
523 bool SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, SDValue &ExtType);
524};
525
526class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
527public:
528 static char ID;
529 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
530 CodeGenOptLevel OptLevel)
532 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
533};
534} // end anonymous namespace
535
536char AArch64DAGToDAGISelLegacy::ID = 0;
537
538INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
539
540/// isIntImmediate - This method tests to see if the node is a constant
541/// operand. If so Imm will receive the 32-bit value.
542static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
544 Imm = C->getZExtValue();
545 return true;
546 }
547 return false;
548}
549
550// isIntImmediate - This method tests to see if a constant operand.
551// If so Imm will receive the value.
552static bool isIntImmediate(SDValue N, uint64_t &Imm) {
553 return isIntImmediate(N.getNode(), Imm);
554}
555
556// isOpcWithIntImmediate - This method tests to see if the node is a specific
557// opcode and that it has a immediate integer right operand.
558// If so Imm will receive the 32 bit value.
559static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
560 uint64_t &Imm) {
561 return N->getOpcode() == Opc &&
562 isIntImmediate(N->getOperand(1).getNode(), Imm);
563}
564
565// isIntImmediateEq - This method tests to see if N is a constant operand that
566// is equivalent to 'ImmExpected'.
567#ifndef NDEBUG
568static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
569 uint64_t Imm;
570 if (!isIntImmediate(N.getNode(), Imm))
571 return false;
572 return Imm == ImmExpected;
573}
574#endif
575
576bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
577 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
578 std::vector<SDValue> &OutOps) {
579 switch(ConstraintID) {
580 default:
581 llvm_unreachable("Unexpected asm memory constraint");
582 case InlineAsm::ConstraintCode::m:
583 case InlineAsm::ConstraintCode::o:
584 case InlineAsm::ConstraintCode::Q:
585 // We need to make sure that this one operand does not end up in XZR, thus
586 // require the address to be in a PointerRegClass register.
587 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
588 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
589 SDLoc dl(Op);
590 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
591 SDValue NewOp =
592 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
593 dl, Op.getValueType(),
594 Op, RC), 0);
595 OutOps.push_back(NewOp);
596 return false;
597 }
598 return true;
599}
600
601/// SelectArithImmed - Select an immediate value that can be represented as
602/// a 12-bit value shifted left by either 0 or 12. If so, return true with
603/// Val set to the 12-bit value and Shift set to the shifter operand.
604bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
605 SDValue &Shift) {
606 // This function is called from the addsub_shifted_imm ComplexPattern,
607 // which lists [imm] as the list of opcode it's interested in, however
608 // we still need to check whether the operand is actually an immediate
609 // here because the ComplexPattern opcode list is only used in
610 // root-level opcode matching.
611 if (!isa<ConstantSDNode>(N.getNode()))
612 return false;
613
614 uint64_t Immed = N.getNode()->getAsZExtVal();
615 unsigned ShiftAmt;
616
617 if (Immed >> 12 == 0) {
618 ShiftAmt = 0;
619 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
620 ShiftAmt = 12;
621 Immed = Immed >> 12;
622 } else
623 return false;
624
625 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
626 SDLoc dl(N);
627 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
628 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
629 return true;
630}
631
632/// SelectNegArithImmed - As above, but negates the value before trying to
633/// select it.
634bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
635 SDValue &Shift) {
636 // This function is called from the addsub_shifted_imm ComplexPattern,
637 // which lists [imm] as the list of opcode it's interested in, however
638 // we still need to check whether the operand is actually an immediate
639 // here because the ComplexPattern opcode list is only used in
640 // root-level opcode matching.
641 if (!isa<ConstantSDNode>(N.getNode()))
642 return false;
643
644 // The immediate operand must be a 24-bit zero-extended immediate.
645 uint64_t Immed = N.getNode()->getAsZExtVal();
646
647 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
648 // have the opposite effect on the C flag, so this pattern mustn't match under
649 // those circumstances.
650 if (Immed == 0)
651 return false;
652
653 if (N.getValueType() == MVT::i32)
654 Immed = ~((uint32_t)Immed) + 1;
655 else
656 Immed = ~Immed + 1ULL;
657 if (Immed & 0xFFFFFFFFFF000000ULL)
658 return false;
659
660 Immed &= 0xFFFFFFULL;
661 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
662 Shift);
663}
664
665/// getShiftTypeForNode - Translate a shift node to the corresponding
666/// ShiftType value.
668 switch (N.getOpcode()) {
669 default:
671 case ISD::SHL:
672 return AArch64_AM::LSL;
673 case ISD::SRL:
674 return AArch64_AM::LSR;
675 case ISD::SRA:
676 return AArch64_AM::ASR;
677 case ISD::ROTR:
678 return AArch64_AM::ROR;
679 }
680}
681
683 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
684}
685
686/// Determine whether it is worth it to fold SHL into the addressing
687/// mode.
689 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
690 // It is worth folding logical shift of up to three places.
691 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
692 if (!CSD)
693 return false;
694 unsigned ShiftVal = CSD->getZExtValue();
695 if (ShiftVal > 3)
696 return false;
697
698 // Check if this particular node is reused in any non-memory related
699 // operation. If yes, do not try to fold this node into the address
700 // computation, since the computation will be kept.
701 const SDNode *Node = V.getNode();
702 for (SDNode *UI : Node->users())
703 if (!isMemOpOrPrefetch(UI))
704 for (SDNode *UII : UI->users())
705 if (!isMemOpOrPrefetch(UII))
706 return false;
707 return true;
708}
709
710/// Determine whether it is worth to fold V into an extended register addressing
711/// mode.
712bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
713 // Trivial if we are optimizing for code size or if there is only
714 // one use of the value.
715 if (CurDAG->shouldOptForSize() || V.hasOneUse())
716 return true;
717
718 // If a subtarget has a slow shift, folding a shift into multiple loads
719 // costs additional micro-ops.
720 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
721 return false;
722
723 // Check whether we're going to emit the address arithmetic anyway because
724 // it's used by a non-address operation.
725 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
726 return true;
727 if (V.getOpcode() == ISD::ADD) {
728 const SDValue LHS = V.getOperand(0);
729 const SDValue RHS = V.getOperand(1);
730 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
731 return true;
732 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
733 return true;
734 }
735
736 // It hurts otherwise, since the value will be reused.
737 return false;
738}
739
740/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
741/// to select more shifted register
742bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
743 SDValue &Shift) {
744 EVT VT = N.getValueType();
745 if (VT != MVT::i32 && VT != MVT::i64)
746 return false;
747
748 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
749 return false;
750 SDValue LHS = N.getOperand(0);
751 if (!LHS->hasOneUse())
752 return false;
753
754 unsigned LHSOpcode = LHS->getOpcode();
755 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
756 return false;
757
758 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
759 if (!ShiftAmtNode)
760 return false;
761
762 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
763 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
764 if (!RHSC)
765 return false;
766
767 APInt AndMask = RHSC->getAPIntValue();
768 unsigned LowZBits, MaskLen;
769 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
770 return false;
771
772 unsigned BitWidth = N.getValueSizeInBits();
773 SDLoc DL(LHS);
774 uint64_t NewShiftC;
775 unsigned NewShiftOp;
776 if (LHSOpcode == ISD::SHL) {
777 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
778 // BitWidth != LowZBits + MaskLen doesn't match the pattern
779 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
780 return false;
781
782 NewShiftC = LowZBits - ShiftAmtC;
783 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
784 } else {
785 if (LowZBits == 0)
786 return false;
787
788 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
789 NewShiftC = LowZBits + ShiftAmtC;
790 if (NewShiftC >= BitWidth)
791 return false;
792
793 // SRA need all high bits
794 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
795 return false;
796
797 // SRL high bits can be 0 or 1
798 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
799 return false;
800
801 if (LHSOpcode == ISD::SRL)
802 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
803 else
804 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
805 }
806
807 assert(NewShiftC < BitWidth && "Invalid shift amount");
808 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
809 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
810 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
811 NewShiftAmt, BitWidthMinus1),
812 0);
813 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
814 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
815 return true;
816}
817
818/// getExtendTypeForNode - Translate an extend node to the corresponding
819/// ExtendType value.
821getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
822 if (N.getOpcode() == ISD::SIGN_EXTEND ||
823 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
824 EVT SrcVT;
825 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
826 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
827 else
828 SrcVT = N.getOperand(0).getValueType();
829
830 if (!IsLoadStore && SrcVT == MVT::i8)
831 return AArch64_AM::SXTB;
832 else if (!IsLoadStore && SrcVT == MVT::i16)
833 return AArch64_AM::SXTH;
834 else if (SrcVT == MVT::i32)
835 return AArch64_AM::SXTW;
836 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
837
839 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
840 N.getOpcode() == ISD::ANY_EXTEND) {
841 EVT SrcVT = N.getOperand(0).getValueType();
842 if (!IsLoadStore && SrcVT == MVT::i8)
843 return AArch64_AM::UXTB;
844 else if (!IsLoadStore && SrcVT == MVT::i16)
845 return AArch64_AM::UXTH;
846 else if (SrcVT == MVT::i32)
847 return AArch64_AM::UXTW;
848 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
849
851 } else if (N.getOpcode() == ISD::AND) {
852 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
853 if (!CSD)
855 uint64_t AndMask = CSD->getZExtValue();
856
857 switch (AndMask) {
858 default:
860 case 0xFF:
861 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
862 case 0xFFFF:
863 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
864 case 0xFFFFFFFF:
865 return AArch64_AM::UXTW;
866 }
867 }
868
870}
871
872/// Determine whether it is worth to fold V into an extended register of an
873/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
874/// instruction, and the shift should be treated as worth folding even if has
875/// multiple uses.
876bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
877 // Trivial if we are optimizing for code size or if there is only
878 // one use of the value.
879 if (CurDAG->shouldOptForSize() || V.hasOneUse())
880 return true;
881
882 // If a subtarget has a fastpath LSL we can fold a logical shift into
883 // the add/sub and save a cycle.
884 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
885 V.getConstantOperandVal(1) <= 4 &&
887 return true;
888
889 // It hurts otherwise, since the value will be reused.
890 return false;
891}
892
893/// SelectShiftedRegister - Select a "shifted register" operand. If the value
894/// is not shifted, set the Shift operand to default of "LSL 0". The logical
895/// instructions allow the shifted register to be rotated, but the arithmetic
896/// instructions do not. The AllowROR parameter specifies whether ROR is
897/// supported.
898bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
899 SDValue &Reg, SDValue &Shift) {
900 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
901 return true;
902
904 if (ShType == AArch64_AM::InvalidShiftExtend)
905 return false;
906 if (!AllowROR && ShType == AArch64_AM::ROR)
907 return false;
908
909 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
910 unsigned BitSize = N.getValueSizeInBits();
911 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
912 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
913
914 Reg = N.getOperand(0);
915 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
916 return isWorthFoldingALU(N, true);
917 }
918
919 return false;
920}
921
922/// Instructions that accept extend modifiers like UXTW expect the register
923/// being extended to be a GPR32, but the incoming DAG might be acting on a
924/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
925/// this is the case.
927 if (N.getValueType() == MVT::i32)
928 return N;
929
930 SDLoc dl(N);
931 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
932}
933
934// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
935template<signed Low, signed High, signed Scale>
936bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
938 return false;
939
940 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
941 if ((MulImm % std::abs(Scale)) == 0) {
942 int64_t RDVLImm = MulImm / Scale;
943 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
944 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
945 return true;
946 }
947 }
948
949 return false;
950}
951
952// Returns a suitable RDSVL multiplier from a left shift.
953template <signed Low, signed High>
954bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
956 return false;
957
958 int64_t MulImm = 1LL << cast<ConstantSDNode>(N)->getSExtValue();
959 if (MulImm >= Low && MulImm <= High) {
960 Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
961 return true;
962 }
963
964 return false;
965}
966
967/// SelectArithExtendedRegister - Select a "extended register" operand. This
968/// operand folds in an extend followed by an optional left shift.
969bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
970 SDValue &Shift) {
971 unsigned ShiftVal = 0;
973
974 if (N.getOpcode() == ISD::SHL) {
975 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
976 if (!CSD)
977 return false;
978 ShiftVal = CSD->getZExtValue();
979 if (ShiftVal > 4)
980 return false;
981
982 Ext = getExtendTypeForNode(N.getOperand(0));
984 return false;
985
986 Reg = N.getOperand(0).getOperand(0);
987 } else {
988 Ext = getExtendTypeForNode(N);
990 return false;
991
992 Reg = N.getOperand(0);
993
994 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
995 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
996 auto isDef32 = [](SDValue N) {
997 unsigned Opc = N.getOpcode();
998 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
1001 Opc != ISD::FREEZE;
1002 };
1003 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
1004 isDef32(Reg))
1005 return false;
1006 }
1007
1008 // AArch64 mandates that the RHS of the operation must use the smallest
1009 // register class that could contain the size being extended from. Thus,
1010 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1011 // there might not be an actual 32-bit value in the program. We can
1012 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1013 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1014 Reg = narrowIfNeeded(CurDAG, Reg);
1015 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1016 MVT::i32);
1017 return isWorthFoldingALU(N);
1018}
1019
1020/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1021/// operand is referred by the instructions have SP operand
1022bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1023 SDValue &Shift) {
1024 unsigned ShiftVal = 0;
1026
1027 if (N.getOpcode() != ISD::SHL)
1028 return false;
1029
1030 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1031 if (!CSD)
1032 return false;
1033 ShiftVal = CSD->getZExtValue();
1034 if (ShiftVal > 4)
1035 return false;
1036
1037 Ext = AArch64_AM::UXTX;
1038 Reg = N.getOperand(0);
1039 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1040 MVT::i32);
1041 return isWorthFoldingALU(N);
1042}
1043
1044/// If there's a use of this ADDlow that's not itself a load/store then we'll
1045/// need to create a real ADD instruction from it anyway and there's no point in
1046/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1047/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1048/// leads to duplicated ADRP instructions.
1050 for (auto *User : N->users()) {
1051 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1052 User->getOpcode() != ISD::ATOMIC_LOAD &&
1053 User->getOpcode() != ISD::ATOMIC_STORE)
1054 return false;
1055
1056 // ldar and stlr have much more restrictive addressing modes (just a
1057 // register).
1058 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1059 return false;
1060 }
1061
1062 return true;
1063}
1064
1065/// Check if the immediate offset is valid as a scaled immediate.
1066static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1067 unsigned Size) {
1068 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1069 Offset < (Range << Log2_32(Size)))
1070 return true;
1071 return false;
1072}
1073
1074/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1075/// immediate" address. The "Size" argument is the size in bytes of the memory
1076/// reference, which determines the scale.
1077bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1078 unsigned BW, unsigned Size,
1079 SDValue &Base,
1080 SDValue &OffImm) {
1081 SDLoc dl(N);
1082 const DataLayout &DL = CurDAG->getDataLayout();
1083 const TargetLowering *TLI = getTargetLowering();
1084 if (N.getOpcode() == ISD::FrameIndex) {
1085 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1086 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1087 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1088 return true;
1089 }
1090
1091 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1092 // selected here doesn't support labels/immediates, only base+offset.
1093 if (CurDAG->isBaseWithConstantOffset(N)) {
1094 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1095 if (IsSignedImm) {
1096 int64_t RHSC = RHS->getSExtValue();
1097 unsigned Scale = Log2_32(Size);
1098 int64_t Range = 0x1LL << (BW - 1);
1099
1100 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1101 RHSC < (Range << Scale)) {
1102 Base = N.getOperand(0);
1103 if (Base.getOpcode() == ISD::FrameIndex) {
1104 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1105 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1106 }
1107 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1108 return true;
1109 }
1110 } else {
1111 // unsigned Immediate
1112 uint64_t RHSC = RHS->getZExtValue();
1113 unsigned Scale = Log2_32(Size);
1114 uint64_t Range = 0x1ULL << BW;
1115
1116 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1117 Base = N.getOperand(0);
1118 if (Base.getOpcode() == ISD::FrameIndex) {
1119 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1120 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1121 }
1122 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1123 return true;
1124 }
1125 }
1126 }
1127 }
1128 // Base only. The address will be materialized into a register before
1129 // the memory is accessed.
1130 // add x0, Xbase, #offset
1131 // stp x1, x2, [x0]
1132 Base = N;
1133 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1134 return true;
1135}
1136
1137/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1138/// immediate" address. The "Size" argument is the size in bytes of the memory
1139/// reference, which determines the scale.
1140bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1141 SDValue &Base, SDValue &OffImm) {
1142 SDLoc dl(N);
1143 const DataLayout &DL = CurDAG->getDataLayout();
1144 const TargetLowering *TLI = getTargetLowering();
1145 if (N.getOpcode() == ISD::FrameIndex) {
1146 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1147 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1148 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1149 return true;
1150 }
1151
1152 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1153 GlobalAddressSDNode *GAN =
1154 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1155 Base = N.getOperand(0);
1156 OffImm = N.getOperand(1);
1157 if (!GAN)
1158 return true;
1159
1160 if (GAN->getOffset() % Size == 0 &&
1162 return true;
1163 }
1164
1165 if (CurDAG->isBaseWithConstantOffset(N)) {
1166 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1167 int64_t RHSC = (int64_t)RHS->getZExtValue();
1168 unsigned Scale = Log2_32(Size);
1169 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1170 Base = N.getOperand(0);
1171 if (Base.getOpcode() == ISD::FrameIndex) {
1172 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1173 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1174 }
1175 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1176 return true;
1177 }
1178 }
1179 }
1180
1181 // Before falling back to our general case, check if the unscaled
1182 // instructions can handle this. If so, that's preferable.
1183 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1184 return false;
1185
1186 // Base only. The address will be materialized into a register before
1187 // the memory is accessed.
1188 // add x0, Xbase, #offset
1189 // ldr x0, [x0]
1190 Base = N;
1191 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1192 return true;
1193}
1194
1195/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1196/// immediate" address. This should only match when there is an offset that
1197/// is not valid for a scaled immediate addressing mode. The "Size" argument
1198/// is the size in bytes of the memory reference, which is needed here to know
1199/// what is valid for a scaled immediate.
1200bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1201 SDValue &Base,
1202 SDValue &OffImm) {
1203 if (!CurDAG->isBaseWithConstantOffset(N))
1204 return false;
1205 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1206 int64_t RHSC = RHS->getSExtValue();
1207 if (RHSC >= -256 && RHSC < 256) {
1208 Base = N.getOperand(0);
1209 if (Base.getOpcode() == ISD::FrameIndex) {
1210 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1211 const TargetLowering *TLI = getTargetLowering();
1212 Base = CurDAG->getTargetFrameIndex(
1213 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1214 }
1215 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1216 return true;
1217 }
1218 }
1219 return false;
1220}
1221
1223 SDLoc dl(N);
1224 SDValue ImpDef = SDValue(
1225 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1226 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1227 N);
1228}
1229
1230/// Check if the given SHL node (\p N), can be used to form an
1231/// extended register for an addressing mode.
1232bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1233 bool WantExtend, SDValue &Offset,
1234 SDValue &SignExtend) {
1235 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1236 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1237 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1238 return false;
1239
1240 SDLoc dl(N);
1241 if (WantExtend) {
1243 getExtendTypeForNode(N.getOperand(0), true);
1245 return false;
1246
1247 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1248 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1249 MVT::i32);
1250 } else {
1251 Offset = N.getOperand(0);
1252 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1253 }
1254
1255 unsigned LegalShiftVal = Log2_32(Size);
1256 unsigned ShiftVal = CSD->getZExtValue();
1257
1258 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1259 return false;
1260
1261 return isWorthFoldingAddr(N, Size);
1262}
1263
1264bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1266 SDValue &SignExtend,
1267 SDValue &DoShift) {
1268 if (N.getOpcode() != ISD::ADD)
1269 return false;
1270 SDValue LHS = N.getOperand(0);
1271 SDValue RHS = N.getOperand(1);
1272 SDLoc dl(N);
1273
1274 // We don't want to match immediate adds here, because they are better lowered
1275 // to the register-immediate addressing modes.
1277 return false;
1278
1279 // Check if this particular node is reused in any non-memory related
1280 // operation. If yes, do not try to fold this node into the address
1281 // computation, since the computation will be kept.
1282 const SDNode *Node = N.getNode();
1283 for (SDNode *UI : Node->users()) {
1284 if (!isMemOpOrPrefetch(UI))
1285 return false;
1286 }
1287
1288 // Remember if it is worth folding N when it produces extended register.
1289 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1290
1291 // Try to match a shifted extend on the RHS.
1292 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1293 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1294 Base = LHS;
1295 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1296 return true;
1297 }
1298
1299 // Try to match a shifted extend on the LHS.
1300 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1301 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1302 Base = RHS;
1303 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1304 return true;
1305 }
1306
1307 // There was no shift, whatever else we find.
1308 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1309
1311 // Try to match an unshifted extend on the LHS.
1312 if (IsExtendedRegisterWorthFolding &&
1313 (Ext = getExtendTypeForNode(LHS, true)) !=
1315 Base = RHS;
1316 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1317 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1318 MVT::i32);
1319 if (isWorthFoldingAddr(LHS, Size))
1320 return true;
1321 }
1322
1323 // Try to match an unshifted extend on the RHS.
1324 if (IsExtendedRegisterWorthFolding &&
1325 (Ext = getExtendTypeForNode(RHS, true)) !=
1327 Base = LHS;
1328 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1329 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1330 MVT::i32);
1331 if (isWorthFoldingAddr(RHS, Size))
1332 return true;
1333 }
1334
1335 return false;
1336}
1337
1338// Check if the given immediate is preferred by ADD. If an immediate can be
1339// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1340// encoded by one MOVZ, return true.
1341static bool isPreferredADD(int64_t ImmOff) {
1342 // Constant in [0x0, 0xfff] can be encoded in ADD.
1343 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1344 return true;
1345 // Check if it can be encoded in an "ADD LSL #12".
1346 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1347 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1348 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1349 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1350 return false;
1351}
1352
1353bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1355 SDValue &SignExtend,
1356 SDValue &DoShift) {
1357 if (N.getOpcode() != ISD::ADD)
1358 return false;
1359 SDValue LHS = N.getOperand(0);
1360 SDValue RHS = N.getOperand(1);
1361 SDLoc DL(N);
1362
1363 // Check if this particular node is reused in any non-memory related
1364 // operation. If yes, do not try to fold this node into the address
1365 // computation, since the computation will be kept.
1366 const SDNode *Node = N.getNode();
1367 for (SDNode *UI : Node->users()) {
1368 if (!isMemOpOrPrefetch(UI))
1369 return false;
1370 }
1371
1372 // Watch out if RHS is a wide immediate, it can not be selected into
1373 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1374 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1375 // instructions like:
1376 // MOV X0, WideImmediate
1377 // ADD X1, BaseReg, X0
1378 // LDR X2, [X1, 0]
1379 // For such situation, using [BaseReg, XReg] addressing mode can save one
1380 // ADD/SUB:
1381 // MOV X0, WideImmediate
1382 // LDR X2, [BaseReg, X0]
1383 if (isa<ConstantSDNode>(RHS)) {
1384 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1385 // Skip the immediate can be selected by load/store addressing mode.
1386 // Also skip the immediate can be encoded by a single ADD (SUB is also
1387 // checked by using -ImmOff).
1388 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1389 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1390 return false;
1391
1392 SDValue Ops[] = { RHS };
1393 SDNode *MOVI =
1394 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1395 SDValue MOVIV = SDValue(MOVI, 0);
1396 // This ADD of two X register will be selected into [Reg+Reg] mode.
1397 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1398 }
1399
1400 // Remember if it is worth folding N when it produces extended register.
1401 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1402
1403 // Try to match a shifted extend on the RHS.
1404 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1405 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1406 Base = LHS;
1407 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1408 return true;
1409 }
1410
1411 // Try to match a shifted extend on the LHS.
1412 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1413 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1414 Base = RHS;
1415 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1416 return true;
1417 }
1418
1419 // Match any non-shifted, non-extend, non-immediate add expression.
1420 Base = LHS;
1421 Offset = RHS;
1422 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1423 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1424 // Reg1 + Reg2 is free: no check needed.
1425 return true;
1426}
1427
1428SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1429 static const unsigned RegClassIDs[] = {
1430 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1431 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1432 AArch64::dsub2, AArch64::dsub3};
1433
1434 return createTuple(Regs, RegClassIDs, SubRegs);
1435}
1436
1437SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1438 static const unsigned RegClassIDs[] = {
1439 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1440 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1441 AArch64::qsub2, AArch64::qsub3};
1442
1443 return createTuple(Regs, RegClassIDs, SubRegs);
1444}
1445
1446SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1447 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1448 AArch64::ZPR3RegClassID,
1449 AArch64::ZPR4RegClassID};
1450 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1451 AArch64::zsub2, AArch64::zsub3};
1452
1453 return createTuple(Regs, RegClassIDs, SubRegs);
1454}
1455
1456SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1457 assert(Regs.size() == 2 || Regs.size() == 4);
1458
1459 // The createTuple interface requires 3 RegClassIDs for each possible
1460 // tuple type even though we only have them for ZPR2 and ZPR4.
1461 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1462 AArch64::ZPR4Mul4RegClassID};
1463 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1464 AArch64::zsub2, AArch64::zsub3};
1465 return createTuple(Regs, RegClassIDs, SubRegs);
1466}
1467
1468SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1469 const unsigned RegClassIDs[],
1470 const unsigned SubRegs[]) {
1471 // There's no special register-class for a vector-list of 1 element: it's just
1472 // a vector.
1473 if (Regs.size() == 1)
1474 return Regs[0];
1475
1476 assert(Regs.size() >= 2 && Regs.size() <= 4);
1477
1478 SDLoc DL(Regs[0]);
1479
1481
1482 // First operand of REG_SEQUENCE is the desired RegClass.
1483 Ops.push_back(
1484 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1485
1486 // Then we get pairs of source & subregister-position for the components.
1487 for (unsigned i = 0; i < Regs.size(); ++i) {
1488 Ops.push_back(Regs[i]);
1489 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1490 }
1491
1492 SDNode *N =
1493 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1494 return SDValue(N, 0);
1495}
1496
1497void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1498 bool isExt) {
1499 SDLoc dl(N);
1500 EVT VT = N->getValueType(0);
1501
1502 unsigned ExtOff = isExt;
1503
1504 // Form a REG_SEQUENCE to force register allocation.
1505 unsigned Vec0Off = ExtOff + 1;
1506 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1507 SDValue RegSeq = createQTuple(Regs);
1508
1510 if (isExt)
1511 Ops.push_back(N->getOperand(1));
1512 Ops.push_back(RegSeq);
1513 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1514 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1515}
1516
1517static std::tuple<SDValue, SDValue>
1519 SDLoc DL(Disc);
1520 SDValue AddrDisc;
1521 SDValue ConstDisc;
1522
1523 // If this is a blend, remember the constant and address discriminators.
1524 // Otherwise, it's either a constant discriminator, or a non-blended
1525 // address discriminator.
1526 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1527 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1528 AddrDisc = Disc->getOperand(1);
1529 ConstDisc = Disc->getOperand(2);
1530 } else {
1531 ConstDisc = Disc;
1532 }
1533
1534 // If the constant discriminator (either the blend RHS, or the entire
1535 // discriminator value) isn't a 16-bit constant, bail out, and let the
1536 // discriminator be computed separately.
1537 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1538 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1539 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1540
1541 // If there's no address discriminator, use XZR directly.
1542 if (!AddrDisc)
1543 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1544
1545 return std::make_tuple(
1546 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1547 AddrDisc);
1548}
1549
1550void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1551 SDLoc DL(N);
1552 // IntrinsicID is operand #0
1553 SDValue Val = N->getOperand(1);
1554 SDValue AUTKey = N->getOperand(2);
1555 SDValue AUTDisc = N->getOperand(3);
1556
1557 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1558 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1559
1560 SDValue AUTAddrDisc, AUTConstDisc;
1561 std::tie(AUTConstDisc, AUTAddrDisc) =
1562 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1563
1564 if (!Subtarget->isX16X17Safer()) {
1565 std::vector<SDValue> Ops = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1566 // Copy deactivation symbol if present.
1567 if (N->getNumOperands() > 4)
1568 Ops.push_back(N->getOperand(4));
1569
1570 SDNode *AUT =
1571 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1572 ReplaceNode(N, AUT);
1573 } else {
1574 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1575 AArch64::X16, Val, SDValue());
1576 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1577
1578 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1579 ReplaceNode(N, AUT);
1580 }
1581}
1582
1583void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1584 SDLoc DL(N);
1585 // IntrinsicID is operand #0, if W_CHAIN it is #1
1586 int OffsetBase = N->getOpcode() == ISD::INTRINSIC_W_CHAIN ? 1 : 0;
1587 SDValue Val = N->getOperand(OffsetBase + 1);
1588 SDValue AUTKey = N->getOperand(OffsetBase + 2);
1589 SDValue AUTDisc = N->getOperand(OffsetBase + 3);
1590 SDValue PACKey = N->getOperand(OffsetBase + 4);
1591 SDValue PACDisc = N->getOperand(OffsetBase + 5);
1592 uint32_t IntNum = N->getConstantOperandVal(OffsetBase + 0);
1593 bool HasLoad = IntNum == Intrinsic::ptrauth_resign_load_relative;
1594
1595 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1596 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1597
1598 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1599 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1600
1601 SDValue AUTAddrDisc, AUTConstDisc;
1602 std::tie(AUTConstDisc, AUTAddrDisc) =
1603 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1604
1605 SDValue PACAddrDisc, PACConstDisc;
1606 std::tie(PACConstDisc, PACAddrDisc) =
1607 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1608
1609 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1610 AArch64::X16, Val, SDValue());
1611
1612 if (HasLoad) {
1613 SDValue Addend = N->getOperand(OffsetBase + 6);
1614 SDValue IncomingChain = N->getOperand(0);
1615 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc,
1616 PACKey, PACConstDisc, PACAddrDisc,
1617 Addend, IncomingChain, X16Copy.getValue(1)};
1618
1619 SDNode *AUTRELLOADPAC = CurDAG->getMachineNode(AArch64::AUTRELLOADPAC, DL,
1620 MVT::i64, MVT::Other, Ops);
1621 ReplaceNode(N, AUTRELLOADPAC);
1622 } else {
1623 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1624 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1625
1626 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1627 ReplaceNode(N, AUTPAC);
1628 }
1629}
1630
1631bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1632 LoadSDNode *LD = cast<LoadSDNode>(N);
1633 if (LD->isUnindexed())
1634 return false;
1635 EVT VT = LD->getMemoryVT();
1636 EVT DstVT = N->getValueType(0);
1637 ISD::MemIndexedMode AM = LD->getAddressingMode();
1638 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1639 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1640 int OffsetVal = (int)OffsetOp->getZExtValue();
1641
1642 // We're not doing validity checking here. That was done when checking
1643 // if we should mark the load as indexed or not. We're just selecting
1644 // the right instruction.
1645 unsigned Opcode = 0;
1646
1647 ISD::LoadExtType ExtType = LD->getExtensionType();
1648 bool InsertTo64 = false;
1649 if (VT == MVT::i64)
1650 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1651 else if (VT == MVT::i32) {
1652 if (ExtType == ISD::NON_EXTLOAD)
1653 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1654 else if (ExtType == ISD::SEXTLOAD)
1655 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1656 else {
1657 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1658 InsertTo64 = true;
1659 // The result of the load is only i32. It's the subreg_to_reg that makes
1660 // it into an i64.
1661 DstVT = MVT::i32;
1662 }
1663 } else if (VT == MVT::i16) {
1664 if (ExtType == ISD::SEXTLOAD) {
1665 if (DstVT == MVT::i64)
1666 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1667 else
1668 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1669 } else {
1670 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1671 InsertTo64 = DstVT == MVT::i64;
1672 // The result of the load is only i32. It's the subreg_to_reg that makes
1673 // it into an i64.
1674 DstVT = MVT::i32;
1675 }
1676 } else if (VT == MVT::i8) {
1677 if (ExtType == ISD::SEXTLOAD) {
1678 if (DstVT == MVT::i64)
1679 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1680 else
1681 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1682 } else {
1683 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1684 InsertTo64 = DstVT == MVT::i64;
1685 // The result of the load is only i32. It's the subreg_to_reg that makes
1686 // it into an i64.
1687 DstVT = MVT::i32;
1688 }
1689 } else if (VT == MVT::f16) {
1690 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1691 } else if (VT == MVT::bf16) {
1692 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1693 } else if (VT == MVT::f32) {
1694 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1695 } else if (VT == MVT::f64 ||
1696 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1697 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1698 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1699 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1700 } else if (VT.is64BitVector()) {
1701 if (IsPre || OffsetVal != 8)
1702 return false;
1703 switch (VT.getScalarSizeInBits()) {
1704 case 8:
1705 Opcode = AArch64::LD1Onev8b_POST;
1706 break;
1707 case 16:
1708 Opcode = AArch64::LD1Onev4h_POST;
1709 break;
1710 case 32:
1711 Opcode = AArch64::LD1Onev2s_POST;
1712 break;
1713 case 64:
1714 Opcode = AArch64::LD1Onev1d_POST;
1715 break;
1716 default:
1717 llvm_unreachable("Expected vector element to be a power of 2");
1718 }
1719 } else if (VT.is128BitVector()) {
1720 if (IsPre || OffsetVal != 16)
1721 return false;
1722 switch (VT.getScalarSizeInBits()) {
1723 case 8:
1724 Opcode = AArch64::LD1Onev16b_POST;
1725 break;
1726 case 16:
1727 Opcode = AArch64::LD1Onev8h_POST;
1728 break;
1729 case 32:
1730 Opcode = AArch64::LD1Onev4s_POST;
1731 break;
1732 case 64:
1733 Opcode = AArch64::LD1Onev2d_POST;
1734 break;
1735 default:
1736 llvm_unreachable("Expected vector element to be a power of 2");
1737 }
1738 } else
1739 return false;
1740 SDValue Chain = LD->getChain();
1741 SDValue Base = LD->getBasePtr();
1742 SDLoc dl(N);
1743 // LD1 encodes an immediate offset by using XZR as the offset register.
1744 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1745 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1746 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1747 SDValue Ops[] = { Base, Offset, Chain };
1748 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1749 MVT::Other, Ops);
1750
1751 // Transfer memoperands.
1752 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1753 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1754
1755 // Either way, we're replacing the node, so tell the caller that.
1756 SDValue LoadedVal = SDValue(Res, 1);
1757 if (InsertTo64) {
1758 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1759 LoadedVal = SDValue(CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, dl,
1760 MVT::i64, LoadedVal, SubReg),
1761 0);
1762 }
1763
1764 ReplaceUses(SDValue(N, 0), LoadedVal);
1765 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1766 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1767 CurDAG->RemoveDeadNode(N);
1768 return true;
1769}
1770
1771void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1772 unsigned SubRegIdx) {
1773 SDLoc dl(N);
1774 EVT VT = N->getValueType(0);
1775 SDValue Chain = N->getOperand(0);
1776
1777 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1778 Chain};
1779
1780 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1781
1782 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1783 SDValue SuperReg = SDValue(Ld, 0);
1784 for (unsigned i = 0; i < NumVecs; ++i)
1785 ReplaceUses(SDValue(N, i),
1786 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1787
1788 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1789
1790 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1791 // because it's too simple to have needed special treatment during lowering.
1792 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1793 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1794 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1795 }
1796
1797 CurDAG->RemoveDeadNode(N);
1798}
1799
1800void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1801 unsigned Opc, unsigned SubRegIdx) {
1802 SDLoc dl(N);
1803 EVT VT = N->getValueType(0);
1804 SDValue Chain = N->getOperand(0);
1805
1806 SDValue Ops[] = {N->getOperand(1), // Mem operand
1807 N->getOperand(2), // Incremental
1808 Chain};
1809
1810 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1811 MVT::Untyped, MVT::Other};
1812
1813 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1814
1815 // Update uses of write back register
1816 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1817
1818 // Update uses of vector list
1819 SDValue SuperReg = SDValue(Ld, 1);
1820 if (NumVecs == 1)
1821 ReplaceUses(SDValue(N, 0), SuperReg);
1822 else
1823 for (unsigned i = 0; i < NumVecs; ++i)
1824 ReplaceUses(SDValue(N, i),
1825 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1826
1827 // Update the chain
1828 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1829 CurDAG->RemoveDeadNode(N);
1830}
1831
1832/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1833/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1834/// new Base and an SDValue representing the new offset.
1835std::tuple<unsigned, SDValue, SDValue>
1836AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1837 unsigned Opc_ri,
1838 const SDValue &OldBase,
1839 const SDValue &OldOffset,
1840 unsigned Scale) {
1841 SDValue NewBase = OldBase;
1842 SDValue NewOffset = OldOffset;
1843 // Detect a possible Reg+Imm addressing mode.
1844 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1845 N, OldBase, NewBase, NewOffset);
1846
1847 // Detect a possible reg+reg addressing mode, but only if we haven't already
1848 // detected a Reg+Imm one.
1849 const bool IsRegReg =
1850 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1851
1852 // Select the instruction.
1853 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1854}
1855
1856enum class SelectTypeKind {
1857 Int1 = 0,
1858 Int = 1,
1859 FP = 2,
1861};
1862
1863/// This function selects an opcode from a list of opcodes, which is
1864/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1865/// element types, in this order.
1866template <SelectTypeKind Kind>
1867static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1868 // Only match scalable vector VTs
1869 if (!VT.isScalableVector())
1870 return 0;
1871
1872 EVT EltVT = VT.getVectorElementType();
1873 unsigned Key = VT.getVectorMinNumElements();
1874 switch (Kind) {
1876 break;
1878 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1879 EltVT != MVT::i64)
1880 return 0;
1881 break;
1883 if (EltVT != MVT::i1)
1884 return 0;
1885 break;
1886 case SelectTypeKind::FP:
1887 if (EltVT == MVT::bf16)
1888 Key = 16;
1889 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1890 EltVT != MVT::f64)
1891 return 0;
1892 break;
1893 }
1894
1895 unsigned Offset;
1896 switch (Key) {
1897 case 16: // 8-bit or bf16
1898 Offset = 0;
1899 break;
1900 case 8: // 16-bit
1901 Offset = 1;
1902 break;
1903 case 4: // 32-bit
1904 Offset = 2;
1905 break;
1906 case 2: // 64-bit
1907 Offset = 3;
1908 break;
1909 default:
1910 return 0;
1911 }
1912
1913 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1914}
1915
1916// This function is almost identical to SelectWhilePair, but has an
1917// extra check on the range of the immediate operand.
1918// TODO: Merge these two functions together at some point?
1919void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1920 // Immediate can be either 0 or 1.
1921 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1922 if (Imm->getZExtValue() > 1)
1923 return;
1924
1925 SDLoc DL(N);
1926 EVT VT = N->getValueType(0);
1927 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1928 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1929 SDValue SuperReg = SDValue(WhilePair, 0);
1930
1931 for (unsigned I = 0; I < 2; ++I)
1932 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1933 AArch64::psub0 + I, DL, VT, SuperReg));
1934
1935 CurDAG->RemoveDeadNode(N);
1936}
1937
1938void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1939 SDLoc DL(N);
1940 EVT VT = N->getValueType(0);
1941
1942 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1943
1944 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1945 SDValue SuperReg = SDValue(WhilePair, 0);
1946
1947 for (unsigned I = 0; I < 2; ++I)
1948 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1949 AArch64::psub0 + I, DL, VT, SuperReg));
1950
1951 CurDAG->RemoveDeadNode(N);
1952}
1953
1954void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1955 unsigned Opcode) {
1956 EVT VT = N->getValueType(0);
1957 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
1958 SDValue Ops = createZTuple(Regs);
1959 SDLoc DL(N);
1960 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1961 SDValue SuperReg = SDValue(Intrinsic, 0);
1962 for (unsigned i = 0; i < NumVecs; ++i)
1963 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1964 AArch64::zsub0 + i, DL, VT, SuperReg));
1965
1966 CurDAG->RemoveDeadNode(N);
1967}
1968
1969void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
1970 unsigned Opcode) {
1971 SDLoc DL(N);
1972 EVT VT = N->getValueType(0);
1973 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
1974 Ops.push_back(/*Chain*/ N->getOperand(0));
1975
1976 SDNode *Instruction =
1977 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
1978 SDValue SuperReg = SDValue(Instruction, 0);
1979
1980 for (unsigned i = 0; i < NumVecs; ++i)
1981 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1982 AArch64::zsub0 + i, DL, VT, SuperReg));
1983
1984 // Copy chain
1985 unsigned ChainIdx = NumVecs;
1986 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
1987 CurDAG->RemoveDeadNode(N);
1988}
1989
1990void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1991 unsigned NumVecs,
1992 bool IsZmMulti,
1993 unsigned Opcode,
1994 bool HasPred) {
1995 assert(Opcode != 0 && "Unexpected opcode");
1996
1997 SDLoc DL(N);
1998 EVT VT = N->getValueType(0);
1999 SDUse *OpsIter = N->op_begin() + 1; // Skip intrinsic ID
2001
2002 auto GetMultiVecOperand = [&]() {
2003 SmallVector<SDValue, 4> Regs(OpsIter, OpsIter + NumVecs);
2004 OpsIter += NumVecs;
2005 return createZMulTuple(Regs);
2006 };
2007
2008 if (HasPred)
2009 Ops.push_back(*OpsIter++);
2010
2011 Ops.push_back(GetMultiVecOperand());
2012 if (IsZmMulti)
2013 Ops.push_back(GetMultiVecOperand());
2014 else
2015 Ops.push_back(*OpsIter++);
2016
2017 // Append any remaining operands.
2018 Ops.append(OpsIter, N->op_end());
2019 SDNode *Intrinsic;
2020 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
2021 SDValue SuperReg = SDValue(Intrinsic, 0);
2022 for (unsigned i = 0; i < NumVecs; ++i)
2023 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2024 AArch64::zsub0 + i, DL, VT, SuperReg));
2025
2026 CurDAG->RemoveDeadNode(N);
2027}
2028
2029void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2030 unsigned Scale, unsigned Opc_ri,
2031 unsigned Opc_rr, bool IsIntr) {
2032 assert(Scale < 5 && "Invalid scaling value.");
2033 SDLoc DL(N);
2034 EVT VT = N->getValueType(0);
2035 SDValue Chain = N->getOperand(0);
2036
2037 // Optimize addressing mode.
2039 unsigned Opc;
2040 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2041 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
2042 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2043
2044 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2045 Base, // Memory operand
2046 Offset, Chain};
2047
2048 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2049
2050 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2051 SDValue SuperReg = SDValue(Load, 0);
2052 for (unsigned i = 0; i < NumVecs; ++i)
2053 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2054 AArch64::zsub0 + i, DL, VT, SuperReg));
2055
2056 // Copy chain
2057 unsigned ChainIdx = NumVecs;
2058 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2059 CurDAG->RemoveDeadNode(N);
2060}
2061
2062void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2063 unsigned NumVecs,
2064 unsigned Scale,
2065 unsigned Opc_ri,
2066 unsigned Opc_rr) {
2067 assert(Scale < 4 && "Invalid scaling value.");
2068 SDLoc DL(N);
2069 EVT VT = N->getValueType(0);
2070 SDValue Chain = N->getOperand(0);
2071
2072 SDValue PNg = N->getOperand(2);
2073 SDValue Base = N->getOperand(3);
2074 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2075 unsigned Opc;
2076 std::tie(Opc, Base, Offset) =
2077 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2078
2079 SDValue Ops[] = {PNg, // Predicate-as-counter
2080 Base, // Memory operand
2081 Offset, Chain};
2082
2083 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2084
2085 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2086 SDValue SuperReg = SDValue(Load, 0);
2087 for (unsigned i = 0; i < NumVecs; ++i)
2088 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2089 AArch64::zsub0 + i, DL, VT, SuperReg));
2090
2091 // Copy chain
2092 unsigned ChainIdx = NumVecs;
2093 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2094 CurDAG->RemoveDeadNode(N);
2095}
2096
2097void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2098 unsigned Opcode) {
2099 if (N->getValueType(0) != MVT::nxv4f32)
2100 return;
2101 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2102}
2103
2104void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2105 unsigned NumOutVecs,
2106 unsigned Opc,
2107 uint32_t MaxImm) {
2108 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2109 if (Imm->getZExtValue() > MaxImm)
2110 return;
2111
2112 SDValue ZtValue;
2113 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2114 return;
2115
2116 SDValue Chain = Node->getOperand(0);
2117 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4), Chain};
2118 SDLoc DL(Node);
2119 EVT VT = Node->getValueType(0);
2120
2121 SDNode *Instruction =
2122 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2123 SDValue SuperReg = SDValue(Instruction, 0);
2124
2125 for (unsigned I = 0; I < NumOutVecs; ++I)
2126 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2127 AArch64::zsub0 + I, DL, VT, SuperReg));
2128
2129 // Copy chain
2130 unsigned ChainIdx = NumOutVecs;
2131 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2132 CurDAG->RemoveDeadNode(Node);
2133}
2134
2135void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2136 unsigned NumOutVecs,
2137 unsigned Opc) {
2138 SDValue ZtValue;
2139 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2140 return;
2141
2142 SDValue Chain = Node->getOperand(0);
2143 SDValue Ops[] = {ZtValue,
2144 createZMulTuple({Node->getOperand(3), Node->getOperand(4)}),
2145 Chain};
2146
2147 SDLoc DL(Node);
2148 EVT VT = Node->getValueType(0);
2149
2150 SDNode *Instruction =
2151 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2152 SDValue SuperReg = SDValue(Instruction, 0);
2153
2154 for (unsigned I = 0; I < NumOutVecs; ++I)
2155 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2156 AArch64::zsub0 + I, DL, VT, SuperReg));
2157
2158 // Copy chain
2159 unsigned ChainIdx = NumOutVecs;
2160 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2161 CurDAG->RemoveDeadNode(Node);
2162}
2163
2164void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2165 unsigned Op) {
2166 SDLoc DL(N);
2167 EVT VT = N->getValueType(0);
2168
2169 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2170 SDValue Zd = createZMulTuple(Regs);
2171 SDValue Zn = N->getOperand(1 + NumVecs);
2172 SDValue Zm = N->getOperand(2 + NumVecs);
2173
2174 SDValue Ops[] = {Zd, Zn, Zm};
2175
2176 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2177 SDValue SuperReg = SDValue(Intrinsic, 0);
2178 for (unsigned i = 0; i < NumVecs; ++i)
2179 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2180 AArch64::zsub0 + i, DL, VT, SuperReg));
2181
2182 CurDAG->RemoveDeadNode(N);
2183}
2184
2185bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2186 switch (BaseReg) {
2187 default:
2188 return false;
2189 case AArch64::ZA:
2190 case AArch64::ZAB0:
2191 if (TileNum == 0)
2192 break;
2193 return false;
2194 case AArch64::ZAH0:
2195 if (TileNum <= 1)
2196 break;
2197 return false;
2198 case AArch64::ZAS0:
2199 if (TileNum <= 3)
2200 break;
2201 return false;
2202 case AArch64::ZAD0:
2203 if (TileNum <= 7)
2204 break;
2205 return false;
2206 }
2207
2208 BaseReg += TileNum;
2209 return true;
2210}
2211
2212template <unsigned MaxIdx, unsigned Scale>
2213void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2214 unsigned BaseReg, unsigned Op) {
2215 unsigned TileNum = 0;
2216 if (BaseReg != AArch64::ZA)
2217 TileNum = N->getConstantOperandVal(2);
2218
2219 if (!SelectSMETile(BaseReg, TileNum))
2220 return;
2221
2222 SDValue SliceBase, Base, Offset;
2223 if (BaseReg == AArch64::ZA)
2224 SliceBase = N->getOperand(2);
2225 else
2226 SliceBase = N->getOperand(3);
2227
2228 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2229 return;
2230
2231 SDLoc DL(N);
2232 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2233 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2234 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2235
2236 EVT VT = N->getValueType(0);
2237 for (unsigned I = 0; I < NumVecs; ++I)
2238 ReplaceUses(SDValue(N, I),
2239 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2240 SDValue(Mov, 0)));
2241 // Copy chain
2242 unsigned ChainIdx = NumVecs;
2243 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2244 CurDAG->RemoveDeadNode(N);
2245}
2246
2247void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2248 unsigned Op, unsigned MaxIdx,
2249 unsigned Scale, unsigned BaseReg) {
2250 // Slice can be in different positions
2251 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2252 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2253 SDValue SliceBase = N->getOperand(2);
2254 if (BaseReg != AArch64::ZA)
2255 SliceBase = N->getOperand(3);
2256
2258 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2259 return;
2260 // The correct Za tile number is computed in Machine Instruction
2261 // See EmitZAInstr
2262 // DAG cannot select Za tile as an output register with ZReg
2263 SDLoc DL(N);
2265 if (BaseReg != AArch64::ZA )
2266 Ops.push_back(N->getOperand(2));
2267 Ops.push_back(Base);
2268 Ops.push_back(Offset);
2269 Ops.push_back(N->getOperand(0)); //Chain
2270 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2271
2272 EVT VT = N->getValueType(0);
2273 for (unsigned I = 0; I < NumVecs; ++I)
2274 ReplaceUses(SDValue(N, I),
2275 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2276 SDValue(Mov, 0)));
2277
2278 // Copy chain
2279 unsigned ChainIdx = NumVecs;
2280 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2281 CurDAG->RemoveDeadNode(N);
2282}
2283
2284void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2285 unsigned NumOutVecs,
2286 bool IsTupleInput,
2287 unsigned Opc) {
2288 SDLoc DL(N);
2289 EVT VT = N->getValueType(0);
2290 unsigned NumInVecs = N->getNumOperands() - 1;
2291
2293 if (IsTupleInput) {
2294 assert((NumInVecs == 2 || NumInVecs == 4) &&
2295 "Don't know how to handle multi-register input!");
2296 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2297 Ops.push_back(createZMulTuple(Regs));
2298 } else {
2299 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2300 for (unsigned I = 0; I < NumInVecs; I++)
2301 Ops.push_back(N->getOperand(1 + I));
2302 }
2303
2304 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2305 SDValue SuperReg = SDValue(Res, 0);
2306
2307 for (unsigned I = 0; I < NumOutVecs; I++)
2308 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2309 AArch64::zsub0 + I, DL, VT, SuperReg));
2310 CurDAG->RemoveDeadNode(N);
2311}
2312
2313void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2314 unsigned Opc) {
2315 SDLoc dl(N);
2316 EVT VT = N->getOperand(2)->getValueType(0);
2317
2318 // Form a REG_SEQUENCE to force register allocation.
2319 bool Is128Bit = VT.getSizeInBits() == 128;
2320 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2321 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2322
2323 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2324 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2325
2326 // Transfer memoperands.
2327 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2328 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2329
2330 ReplaceNode(N, St);
2331}
2332
2333void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2334 unsigned Scale, unsigned Opc_rr,
2335 unsigned Opc_ri) {
2336 SDLoc dl(N);
2337
2338 // Form a REG_SEQUENCE to force register allocation.
2339 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2340 SDValue RegSeq = createZTuple(Regs);
2341
2342 // Optimize addressing mode.
2343 unsigned Opc;
2345 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2346 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2347 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2348
2349 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2350 Base, // address
2351 Offset, // offset
2352 N->getOperand(0)}; // chain
2353 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2354
2355 ReplaceNode(N, St);
2356}
2357
2358bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2359 SDValue &OffImm) {
2360 SDLoc dl(N);
2361 const DataLayout &DL = CurDAG->getDataLayout();
2362 const TargetLowering *TLI = getTargetLowering();
2363
2364 // Try to match it for the frame address
2365 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2366 int FI = FINode->getIndex();
2367 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2368 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2369 return true;
2370 }
2371
2372 return false;
2373}
2374
2375void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2376 unsigned Opc) {
2377 SDLoc dl(N);
2378 EVT VT = N->getOperand(2)->getValueType(0);
2379 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2380 MVT::Other}; // Type for the Chain
2381
2382 // Form a REG_SEQUENCE to force register allocation.
2383 bool Is128Bit = VT.getSizeInBits() == 128;
2384 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2385 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2386
2387 SDValue Ops[] = {RegSeq,
2388 N->getOperand(NumVecs + 1), // base register
2389 N->getOperand(NumVecs + 2), // Incremental
2390 N->getOperand(0)}; // Chain
2391 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2392
2393 ReplaceNode(N, St);
2394}
2395
2396namespace {
2397/// WidenVector - Given a value in the V64 register class, produce the
2398/// equivalent value in the V128 register class.
2399class WidenVector {
2400 SelectionDAG &DAG;
2401
2402public:
2403 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2404
2405 SDValue operator()(SDValue V64Reg) {
2406 EVT VT = V64Reg.getValueType();
2407 unsigned NarrowSize = VT.getVectorNumElements();
2408 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2409 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2410 SDLoc DL(V64Reg);
2411
2412 SDValue Undef =
2413 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2414 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2415 }
2416};
2417} // namespace
2418
2419/// NarrowVector - Given a value in the V128 register class, produce the
2420/// equivalent value in the V64 register class.
2422 EVT VT = V128Reg.getValueType();
2423 unsigned WideSize = VT.getVectorNumElements();
2424 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2425 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2426
2427 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2428 V128Reg);
2429}
2430
2431void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2432 unsigned Opc) {
2433 SDLoc dl(N);
2434 EVT VT = N->getValueType(0);
2435 bool Narrow = VT.getSizeInBits() == 64;
2436
2437 // Form a REG_SEQUENCE to force register allocation.
2438 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2439
2440 if (Narrow)
2441 transform(Regs, Regs.begin(),
2442 WidenVector(*CurDAG));
2443
2444 SDValue RegSeq = createQTuple(Regs);
2445
2446 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2447
2448 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2449
2450 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2451 N->getOperand(NumVecs + 3), N->getOperand(0)};
2452 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2453 SDValue SuperReg = SDValue(Ld, 0);
2454
2455 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2456 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2457 AArch64::qsub2, AArch64::qsub3 };
2458 for (unsigned i = 0; i < NumVecs; ++i) {
2459 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2460 if (Narrow)
2461 NV = NarrowVector(NV, *CurDAG);
2462 ReplaceUses(SDValue(N, i), NV);
2463 }
2464
2465 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2466 CurDAG->RemoveDeadNode(N);
2467}
2468
2469void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2470 unsigned Opc) {
2471 SDLoc dl(N);
2472 EVT VT = N->getValueType(0);
2473 bool Narrow = VT.getSizeInBits() == 64;
2474
2475 // Form a REG_SEQUENCE to force register allocation.
2476 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2477
2478 if (Narrow)
2479 transform(Regs, Regs.begin(),
2480 WidenVector(*CurDAG));
2481
2482 SDValue RegSeq = createQTuple(Regs);
2483
2484 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2485 RegSeq->getValueType(0), MVT::Other};
2486
2487 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2488
2489 SDValue Ops[] = {RegSeq,
2490 CurDAG->getTargetConstant(LaneNo, dl,
2491 MVT::i64), // Lane Number
2492 N->getOperand(NumVecs + 2), // Base register
2493 N->getOperand(NumVecs + 3), // Incremental
2494 N->getOperand(0)};
2495 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2496
2497 // Update uses of the write back register
2498 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2499
2500 // Update uses of the vector list
2501 SDValue SuperReg = SDValue(Ld, 1);
2502 if (NumVecs == 1) {
2503 ReplaceUses(SDValue(N, 0),
2504 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2505 } else {
2506 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2507 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2508 AArch64::qsub2, AArch64::qsub3 };
2509 for (unsigned i = 0; i < NumVecs; ++i) {
2510 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2511 SuperReg);
2512 if (Narrow)
2513 NV = NarrowVector(NV, *CurDAG);
2514 ReplaceUses(SDValue(N, i), NV);
2515 }
2516 }
2517
2518 // Update the Chain
2519 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2520 CurDAG->RemoveDeadNode(N);
2521}
2522
2523void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2524 unsigned Opc) {
2525 SDLoc dl(N);
2526 EVT VT = N->getOperand(2)->getValueType(0);
2527 bool Narrow = VT.getSizeInBits() == 64;
2528
2529 // Form a REG_SEQUENCE to force register allocation.
2530 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2531
2532 if (Narrow)
2533 transform(Regs, Regs.begin(),
2534 WidenVector(*CurDAG));
2535
2536 SDValue RegSeq = createQTuple(Regs);
2537
2538 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2539
2540 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2541 N->getOperand(NumVecs + 3), N->getOperand(0)};
2542 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2543
2544 // Transfer memoperands.
2545 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2546 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2547
2548 ReplaceNode(N, St);
2549}
2550
2551void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2552 unsigned Opc) {
2553 SDLoc dl(N);
2554 EVT VT = N->getOperand(2)->getValueType(0);
2555 bool Narrow = VT.getSizeInBits() == 64;
2556
2557 // Form a REG_SEQUENCE to force register allocation.
2558 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2559
2560 if (Narrow)
2561 transform(Regs, Regs.begin(),
2562 WidenVector(*CurDAG));
2563
2564 SDValue RegSeq = createQTuple(Regs);
2565
2566 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2567 MVT::Other};
2568
2569 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2570
2571 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2572 N->getOperand(NumVecs + 2), // Base Register
2573 N->getOperand(NumVecs + 3), // Incremental
2574 N->getOperand(0)};
2575 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2576
2577 // Transfer memoperands.
2578 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2579 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2580
2581 ReplaceNode(N, St);
2582}
2583
2585 unsigned &Opc, SDValue &Opd0,
2586 unsigned &LSB, unsigned &MSB,
2587 unsigned NumberOfIgnoredLowBits,
2588 bool BiggerPattern) {
2589 assert(N->getOpcode() == ISD::AND &&
2590 "N must be a AND operation to call this function");
2591
2592 EVT VT = N->getValueType(0);
2593
2594 // Here we can test the type of VT and return false when the type does not
2595 // match, but since it is done prior to that call in the current context
2596 // we turned that into an assert to avoid redundant code.
2597 assert((VT == MVT::i32 || VT == MVT::i64) &&
2598 "Type checking must have been done before calling this function");
2599
2600 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2601 // changed the AND node to a 32-bit mask operation. We'll have to
2602 // undo that as part of the transform here if we want to catch all
2603 // the opportunities.
2604 // Currently the NumberOfIgnoredLowBits argument helps to recover
2605 // from these situations when matching bigger pattern (bitfield insert).
2606
2607 // For unsigned extracts, check for a shift right and mask
2608 uint64_t AndImm = 0;
2609 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2610 return false;
2611
2612 const SDNode *Op0 = N->getOperand(0).getNode();
2613
2614 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2615 // simplified. Try to undo that
2616 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2617
2618 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2619 if (AndImm & (AndImm + 1))
2620 return false;
2621
2622 bool ClampMSB = false;
2623 uint64_t SrlImm = 0;
2624 // Handle the SRL + ANY_EXTEND case.
2625 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2626 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2627 // Extend the incoming operand of the SRL to 64-bit.
2628 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2629 // Make sure to clamp the MSB so that we preserve the semantics of the
2630 // original operations.
2631 ClampMSB = true;
2632 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2634 SrlImm)) {
2635 // If the shift result was truncated, we can still combine them.
2636 Opd0 = Op0->getOperand(0).getOperand(0);
2637
2638 // Use the type of SRL node.
2639 VT = Opd0->getValueType(0);
2640 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2641 Opd0 = Op0->getOperand(0);
2642 ClampMSB = (VT == MVT::i32);
2643 } else if (BiggerPattern) {
2644 // Let's pretend a 0 shift right has been performed.
2645 // The resulting code will be at least as good as the original one
2646 // plus it may expose more opportunities for bitfield insert pattern.
2647 // FIXME: Currently we limit this to the bigger pattern, because
2648 // some optimizations expect AND and not UBFM.
2649 Opd0 = N->getOperand(0);
2650 } else
2651 return false;
2652
2653 // Bail out on large immediates. This happens when no proper
2654 // combining/constant folding was performed.
2655 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2656 LLVM_DEBUG(
2657 (dbgs() << N
2658 << ": Found large shift immediate, this should not happen\n"));
2659 return false;
2660 }
2661
2662 LSB = SrlImm;
2663 MSB = SrlImm +
2664 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2665 : llvm::countr_one<uint64_t>(AndImm)) -
2666 1;
2667 if (ClampMSB)
2668 // Since we're moving the extend before the right shift operation, we need
2669 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2670 // the zeros which would get shifted in with the original right shift
2671 // operation.
2672 MSB = MSB > 31 ? 31 : MSB;
2673
2674 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2675 return true;
2676}
2677
2679 SDValue &Opd0, unsigned &Immr,
2680 unsigned &Imms) {
2681 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2682
2683 EVT VT = N->getValueType(0);
2684 unsigned BitWidth = VT.getSizeInBits();
2685 assert((VT == MVT::i32 || VT == MVT::i64) &&
2686 "Type checking must have been done before calling this function");
2687
2688 SDValue Op = N->getOperand(0);
2689 if (Op->getOpcode() == ISD::TRUNCATE) {
2690 Op = Op->getOperand(0);
2691 VT = Op->getValueType(0);
2692 BitWidth = VT.getSizeInBits();
2693 }
2694
2695 uint64_t ShiftImm;
2696 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2697 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2698 return false;
2699
2700 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2701 if (ShiftImm + Width > BitWidth)
2702 return false;
2703
2704 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2705 Opd0 = Op.getOperand(0);
2706 Immr = ShiftImm;
2707 Imms = ShiftImm + Width - 1;
2708 return true;
2709}
2710
2712 SDValue &Opd0, unsigned &LSB,
2713 unsigned &MSB) {
2714 // We are looking for the following pattern which basically extracts several
2715 // continuous bits from the source value and places it from the LSB of the
2716 // destination value, all other bits of the destination value or set to zero:
2717 //
2718 // Value2 = AND Value, MaskImm
2719 // SRL Value2, ShiftImm
2720 //
2721 // with MaskImm >> ShiftImm to search for the bit width.
2722 //
2723 // This gets selected into a single UBFM:
2724 //
2725 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2726 //
2727
2728 if (N->getOpcode() != ISD::SRL)
2729 return false;
2730
2731 uint64_t AndMask = 0;
2732 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2733 return false;
2734
2735 Opd0 = N->getOperand(0).getOperand(0);
2736
2737 uint64_t SrlImm = 0;
2738 if (!isIntImmediate(N->getOperand(1), SrlImm))
2739 return false;
2740
2741 // Check whether we really have several bits extract here.
2742 if (!isMask_64(AndMask >> SrlImm))
2743 return false;
2744
2745 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2746 LSB = SrlImm;
2747 MSB = llvm::Log2_64(AndMask);
2748 return true;
2749}
2750
2751static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2752 unsigned &Immr, unsigned &Imms,
2753 bool BiggerPattern) {
2754 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2755 "N must be a SHR/SRA operation to call this function");
2756
2757 EVT VT = N->getValueType(0);
2758
2759 // Here we can test the type of VT and return false when the type does not
2760 // match, but since it is done prior to that call in the current context
2761 // we turned that into an assert to avoid redundant code.
2762 assert((VT == MVT::i32 || VT == MVT::i64) &&
2763 "Type checking must have been done before calling this function");
2764
2765 // Check for AND + SRL doing several bits extract.
2766 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2767 return true;
2768
2769 // We're looking for a shift of a shift.
2770 uint64_t ShlImm = 0;
2771 uint64_t TruncBits = 0;
2772 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2773 Opd0 = N->getOperand(0).getOperand(0);
2774 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2775 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2776 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2777 // be considered as setting high 32 bits as zero. Our strategy here is to
2778 // always generate 64bit UBFM. This consistency will help the CSE pass
2779 // later find more redundancy.
2780 Opd0 = N->getOperand(0).getOperand(0);
2781 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2782 VT = Opd0.getValueType();
2783 assert(VT == MVT::i64 && "the promoted type should be i64");
2784 } else if (BiggerPattern) {
2785 // Let's pretend a 0 shift left has been performed.
2786 // FIXME: Currently we limit this to the bigger pattern case,
2787 // because some optimizations expect AND and not UBFM
2788 Opd0 = N->getOperand(0);
2789 } else
2790 return false;
2791
2792 // Missing combines/constant folding may have left us with strange
2793 // constants.
2794 if (ShlImm >= VT.getSizeInBits()) {
2795 LLVM_DEBUG(
2796 (dbgs() << N
2797 << ": Found large shift immediate, this should not happen\n"));
2798 return false;
2799 }
2800
2801 uint64_t SrlImm = 0;
2802 if (!isIntImmediate(N->getOperand(1), SrlImm))
2803 return false;
2804
2805 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2806 "bad amount in shift node!");
2807 int immr = SrlImm - ShlImm;
2808 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2809 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2810 // SRA requires a signed extraction
2811 if (VT == MVT::i32)
2812 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2813 else
2814 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2815 return true;
2816}
2817
2818bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2819 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2820
2821 EVT VT = N->getValueType(0);
2822 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2823 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2824 return false;
2825
2826 uint64_t ShiftImm;
2827 SDValue Op = N->getOperand(0);
2828 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2829 return false;
2830
2831 SDLoc dl(N);
2832 // Extend the incoming operand of the shift to 64-bits.
2833 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2834 unsigned Immr = ShiftImm;
2835 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2836 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2837 CurDAG->getTargetConstant(Imms, dl, VT)};
2838 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2839 return true;
2840}
2841
2842static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2843 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2844 unsigned NumberOfIgnoredLowBits = 0,
2845 bool BiggerPattern = false) {
2846 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2847 return false;
2848
2849 switch (N->getOpcode()) {
2850 default:
2851 if (!N->isMachineOpcode())
2852 return false;
2853 break;
2854 case ISD::AND:
2855 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2856 NumberOfIgnoredLowBits, BiggerPattern);
2857 case ISD::SRL:
2858 case ISD::SRA:
2859 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2860
2862 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2863 }
2864
2865 unsigned NOpc = N->getMachineOpcode();
2866 switch (NOpc) {
2867 default:
2868 return false;
2869 case AArch64::SBFMWri:
2870 case AArch64::UBFMWri:
2871 case AArch64::SBFMXri:
2872 case AArch64::UBFMXri:
2873 Opc = NOpc;
2874 Opd0 = N->getOperand(0);
2875 Immr = N->getConstantOperandVal(1);
2876 Imms = N->getConstantOperandVal(2);
2877 return true;
2878 }
2879 // Unreachable
2880 return false;
2881}
2882
2883bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2884 unsigned Opc, Immr, Imms;
2885 SDValue Opd0;
2886 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2887 return false;
2888
2889 EVT VT = N->getValueType(0);
2890 SDLoc dl(N);
2891
2892 // If the bit extract operation is 64bit but the original type is 32bit, we
2893 // need to add one EXTRACT_SUBREG.
2894 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2895 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2896 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2897
2898 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2899 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2900 MVT::i32, SDValue(BFM, 0));
2901 ReplaceNode(N, Inner.getNode());
2902 return true;
2903 }
2904
2905 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2906 CurDAG->getTargetConstant(Imms, dl, VT)};
2907 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2908 return true;
2909}
2910
2911/// Does DstMask form a complementary pair with the mask provided by
2912/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2913/// this asks whether DstMask zeroes precisely those bits that will be set by
2914/// the other half.
2915static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2916 unsigned NumberOfIgnoredHighBits, EVT VT) {
2917 assert((VT == MVT::i32 || VT == MVT::i64) &&
2918 "i32 or i64 mask type expected!");
2919 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2920
2921 // Enable implicitTrunc as we're intentionally ignoring high bits.
2922 APInt SignificantDstMask =
2923 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
2924 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2925
2926 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2927 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2928}
2929
2930// Look for bits that will be useful for later uses.
2931// A bit is consider useless as soon as it is dropped and never used
2932// before it as been dropped.
2933// E.g., looking for useful bit of x
2934// 1. y = x & 0x7
2935// 2. z = y >> 2
2936// After #1, x useful bits are 0x7, then the useful bits of x, live through
2937// y.
2938// After #2, the useful bits of x are 0x4.
2939// However, if x is used on an unpredictable instruction, then all its bits
2940// are useful.
2941// E.g.
2942// 1. y = x & 0x7
2943// 2. z = y >> 2
2944// 3. str x, [@x]
2945static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2946
2948 unsigned Depth) {
2949 uint64_t Imm =
2950 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2951 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2952 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2953 getUsefulBits(Op, UsefulBits, Depth + 1);
2954}
2955
2957 uint64_t Imm, uint64_t MSB,
2958 unsigned Depth) {
2959 // inherit the bitwidth value
2960 APInt OpUsefulBits(UsefulBits);
2961 OpUsefulBits = 1;
2962
2963 if (MSB >= Imm) {
2964 OpUsefulBits <<= MSB - Imm + 1;
2965 --OpUsefulBits;
2966 // The interesting part will be in the lower part of the result
2967 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2968 // The interesting part was starting at Imm in the argument
2969 OpUsefulBits <<= Imm;
2970 } else {
2971 OpUsefulBits <<= MSB + 1;
2972 --OpUsefulBits;
2973 // The interesting part will be shifted in the result
2974 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2975 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2976 // The interesting part was at zero in the argument
2977 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2978 }
2979
2980 UsefulBits &= OpUsefulBits;
2981}
2982
2983static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2984 unsigned Depth) {
2985 uint64_t Imm =
2986 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2987 uint64_t MSB =
2988 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2989
2990 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2991}
2992
2994 unsigned Depth) {
2995 uint64_t ShiftTypeAndValue =
2996 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2997 APInt Mask(UsefulBits);
2998 Mask.clearAllBits();
2999 Mask.flipAllBits();
3000
3001 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
3002 // Shift Left
3003 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3004 Mask <<= ShiftAmt;
3005 getUsefulBits(Op, Mask, Depth + 1);
3006 Mask.lshrInPlace(ShiftAmt);
3007 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
3008 // Shift Right
3009 // We do not handle AArch64_AM::ASR, because the sign will change the
3010 // number of useful bits
3011 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3012 Mask.lshrInPlace(ShiftAmt);
3013 getUsefulBits(Op, Mask, Depth + 1);
3014 Mask <<= ShiftAmt;
3015 } else
3016 return;
3017
3018 UsefulBits &= Mask;
3019}
3020
3021static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
3022 unsigned Depth) {
3023 uint64_t Imm =
3024 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3025 uint64_t MSB =
3026 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
3027
3028 APInt OpUsefulBits(UsefulBits);
3029 OpUsefulBits = 1;
3030
3031 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3032 ResultUsefulBits.flipAllBits();
3033 APInt Mask(UsefulBits.getBitWidth(), 0);
3034
3035 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
3036
3037 if (MSB >= Imm) {
3038 // The instruction is a BFXIL.
3039 uint64_t Width = MSB - Imm + 1;
3040 uint64_t LSB = Imm;
3041
3042 OpUsefulBits <<= Width;
3043 --OpUsefulBits;
3044
3045 if (Op.getOperand(1) == Orig) {
3046 // Copy the low bits from the result to bits starting from LSB.
3047 Mask = ResultUsefulBits & OpUsefulBits;
3048 Mask <<= LSB;
3049 }
3050
3051 if (Op.getOperand(0) == Orig)
3052 // Bits starting from LSB in the input contribute to the result.
3053 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3054 } else {
3055 // The instruction is a BFI.
3056 uint64_t Width = MSB + 1;
3057 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3058
3059 OpUsefulBits <<= Width;
3060 --OpUsefulBits;
3061 OpUsefulBits <<= LSB;
3062
3063 if (Op.getOperand(1) == Orig) {
3064 // Copy the bits from the result to the zero bits.
3065 Mask = ResultUsefulBits & OpUsefulBits;
3066 Mask.lshrInPlace(LSB);
3067 }
3068
3069 if (Op.getOperand(0) == Orig)
3070 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3071 }
3072
3073 UsefulBits &= Mask;
3074}
3075
3076static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3077 SDValue Orig, unsigned Depth) {
3078
3079 // Users of this node should have already been instruction selected
3080 // FIXME: Can we turn that into an assert?
3081 if (!UserNode->isMachineOpcode())
3082 return;
3083
3084 switch (UserNode->getMachineOpcode()) {
3085 default:
3086 return;
3087 case AArch64::ANDSWri:
3088 case AArch64::ANDSXri:
3089 case AArch64::ANDWri:
3090 case AArch64::ANDXri:
3091 // We increment Depth only when we call the getUsefulBits
3092 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3093 Depth);
3094 case AArch64::UBFMWri:
3095 case AArch64::UBFMXri:
3096 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3097
3098 case AArch64::ORRWrs:
3099 case AArch64::ORRXrs:
3100 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3101 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3102 Depth);
3103 return;
3104 case AArch64::BFMWri:
3105 case AArch64::BFMXri:
3106 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3107
3108 case AArch64::STRBBui:
3109 case AArch64::STURBBi:
3110 if (UserNode->getOperand(0) != Orig)
3111 return;
3112 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3113 return;
3114
3115 case AArch64::STRHHui:
3116 case AArch64::STURHHi:
3117 if (UserNode->getOperand(0) != Orig)
3118 return;
3119 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3120 return;
3121 }
3122}
3123
3124static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3126 return;
3127 // Initialize UsefulBits
3128 if (!Depth) {
3129 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3130 // At the beginning, assume every produced bits is useful
3131 UsefulBits = APInt(Bitwidth, 0);
3132 UsefulBits.flipAllBits();
3133 }
3134 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3135
3136 for (SDNode *Node : Op.getNode()->users()) {
3137 // A use cannot produce useful bits
3138 APInt UsefulBitsForUse = APInt(UsefulBits);
3139 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3140 UsersUsefulBits |= UsefulBitsForUse;
3141 }
3142 // UsefulBits contains the produced bits that are meaningful for the
3143 // current definition, thus a user cannot make a bit meaningful at
3144 // this point
3145 UsefulBits &= UsersUsefulBits;
3146}
3147
3148/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3149/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3150/// 0, return Op unchanged.
3151static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3152 if (ShlAmount == 0)
3153 return Op;
3154
3155 EVT VT = Op.getValueType();
3156 SDLoc dl(Op);
3157 unsigned BitWidth = VT.getSizeInBits();
3158 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3159
3160 SDNode *ShiftNode;
3161 if (ShlAmount > 0) {
3162 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3163 ShiftNode = CurDAG->getMachineNode(
3164 UBFMOpc, dl, VT, Op,
3165 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3166 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3167 } else {
3168 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3169 assert(ShlAmount < 0 && "expected right shift");
3170 int ShrAmount = -ShlAmount;
3171 ShiftNode = CurDAG->getMachineNode(
3172 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3173 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3174 }
3175
3176 return SDValue(ShiftNode, 0);
3177}
3178
3179// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3180static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3181 bool BiggerPattern,
3182 const uint64_t NonZeroBits,
3183 SDValue &Src, int &DstLSB,
3184 int &Width);
3185
3186// For bit-field-positioning pattern "shl VAL, N)".
3187static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3188 bool BiggerPattern,
3189 const uint64_t NonZeroBits,
3190 SDValue &Src, int &DstLSB,
3191 int &Width);
3192
3193/// Does this tree qualify as an attempt to move a bitfield into position,
3194/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3196 bool BiggerPattern, SDValue &Src,
3197 int &DstLSB, int &Width) {
3198 EVT VT = Op.getValueType();
3199 unsigned BitWidth = VT.getSizeInBits();
3200 (void)BitWidth;
3201 assert(BitWidth == 32 || BitWidth == 64);
3202
3203 KnownBits Known = CurDAG->computeKnownBits(Op);
3204
3205 // Non-zero in the sense that they're not provably zero, which is the key
3206 // point if we want to use this value
3207 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3208 if (!isShiftedMask_64(NonZeroBits))
3209 return false;
3210
3211 switch (Op.getOpcode()) {
3212 default:
3213 break;
3214 case ISD::AND:
3215 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3216 NonZeroBits, Src, DstLSB, Width);
3217 case ISD::SHL:
3218 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3219 NonZeroBits, Src, DstLSB, Width);
3220 }
3221
3222 return false;
3223}
3224
3226 bool BiggerPattern,
3227 const uint64_t NonZeroBits,
3228 SDValue &Src, int &DstLSB,
3229 int &Width) {
3230 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3231
3232 EVT VT = Op.getValueType();
3233 assert((VT == MVT::i32 || VT == MVT::i64) &&
3234 "Caller guarantees VT is one of i32 or i64");
3235 (void)VT;
3236
3237 uint64_t AndImm;
3238 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3239 return false;
3240
3241 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3242 // 1) (AndImm & (1 << POS) == 0)
3243 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3244 //
3245 // 1) and 2) don't agree so something must be wrong (e.g., in
3246 // 'SelectionDAG::computeKnownBits')
3247 assert((~AndImm & NonZeroBits) == 0 &&
3248 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3249
3250 SDValue AndOp0 = Op.getOperand(0);
3251
3252 uint64_t ShlImm;
3253 SDValue ShlOp0;
3254 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3255 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3256 ShlOp0 = AndOp0.getOperand(0);
3257 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3259 ShlImm)) {
3260 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3261
3262 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3263 SDValue ShlVal = AndOp0.getOperand(0);
3264
3265 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3266 // expect VT to be MVT::i32.
3267 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3268
3269 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3270 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3271 } else
3272 return false;
3273
3274 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3275 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3276 // AndOp0+AND.
3277 if (!BiggerPattern && !AndOp0.hasOneUse())
3278 return false;
3279
3280 DstLSB = llvm::countr_zero(NonZeroBits);
3281 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3282
3283 // Bail out on large Width. This happens when no proper combining / constant
3284 // folding was performed.
3285 if (Width >= (int)VT.getSizeInBits()) {
3286 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3287 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3288 // "val".
3289 // If VT is i32, what Width >= 32 means:
3290 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3291 // demands at least 'Width' bits (after dag-combiner). This together with
3292 // `any_extend` Op (undefined higher bits) indicates missed combination
3293 // when lowering the 'and' IR instruction to an machine IR instruction.
3294 LLVM_DEBUG(
3295 dbgs()
3296 << "Found large Width in bit-field-positioning -- this indicates no "
3297 "proper combining / constant folding was performed\n");
3298 return false;
3299 }
3300
3301 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3302 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3303 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3304 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3305 // which case it is not profitable to insert an extra shift.
3306 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3307 return false;
3308
3309 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3310 return true;
3311}
3312
3313// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3314// UBFIZ.
3316 SDValue &Src, int &DstLSB,
3317 int &Width) {
3318 // Caller should have verified that N is a left shift with constant shift
3319 // amount; asserts that.
3320 assert(Op.getOpcode() == ISD::SHL &&
3321 "Op.getNode() should be a SHL node to call this function");
3322 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3323 "Op.getNode() should shift ShlImm to call this function");
3324
3325 uint64_t AndImm = 0;
3326 SDValue Op0 = Op.getOperand(0);
3327 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3328 return false;
3329
3330 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3331 if (isMask_64(ShiftedAndImm)) {
3332 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3333 // should end with Mask, and could be prefixed with random bits if those
3334 // bits are shifted out.
3335 //
3336 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3337 // the AND result corresponding to those bits are shifted out, so it's fine
3338 // to not extract them.
3339 Width = llvm::countr_one(ShiftedAndImm);
3340 DstLSB = ShlImm;
3341 Src = Op0.getOperand(0);
3342 return true;
3343 }
3344 return false;
3345}
3346
3348 bool BiggerPattern,
3349 const uint64_t NonZeroBits,
3350 SDValue &Src, int &DstLSB,
3351 int &Width) {
3352 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3353
3354 EVT VT = Op.getValueType();
3355 assert((VT == MVT::i32 || VT == MVT::i64) &&
3356 "Caller guarantees that type is i32 or i64");
3357 (void)VT;
3358
3359 uint64_t ShlImm;
3360 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3361 return false;
3362
3363 if (!BiggerPattern && !Op.hasOneUse())
3364 return false;
3365
3366 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3367 return true;
3368
3369 DstLSB = llvm::countr_zero(NonZeroBits);
3370 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3371
3372 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3373 return false;
3374
3375 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3376 return true;
3377}
3378
3379static bool isShiftedMask(uint64_t Mask, EVT VT) {
3380 assert(VT == MVT::i32 || VT == MVT::i64);
3381 if (VT == MVT::i32)
3382 return isShiftedMask_32(Mask);
3383 return isShiftedMask_64(Mask);
3384}
3385
3386// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3387// inserted only sets known zero bits.
3389 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3390
3391 EVT VT = N->getValueType(0);
3392 if (VT != MVT::i32 && VT != MVT::i64)
3393 return false;
3394
3395 unsigned BitWidth = VT.getSizeInBits();
3396
3397 uint64_t OrImm;
3398 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3399 return false;
3400
3401 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3402 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3403 // performance neutral.
3405 return false;
3406
3407 uint64_t MaskImm;
3408 SDValue And = N->getOperand(0);
3409 // Must be a single use AND with an immediate operand.
3410 if (!And.hasOneUse() ||
3411 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3412 return false;
3413
3414 // Compute the Known Zero for the AND as this allows us to catch more general
3415 // cases than just looking for AND with imm.
3416 KnownBits Known = CurDAG->computeKnownBits(And);
3417
3418 // Non-zero in the sense that they're not provably zero, which is the key
3419 // point if we want to use this value.
3420 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3421
3422 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3423 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3424 return false;
3425
3426 // The bits being inserted must only set those bits that are known to be zero.
3427 if ((OrImm & NotKnownZero) != 0) {
3428 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3429 // currently handle this case.
3430 return false;
3431 }
3432
3433 // BFI/BFXIL dst, src, #lsb, #width.
3434 int LSB = llvm::countr_one(NotKnownZero);
3435 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3436
3437 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3438 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3439 unsigned ImmS = Width - 1;
3440
3441 // If we're creating a BFI instruction avoid cases where we need more
3442 // instructions to materialize the BFI constant as compared to the original
3443 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3444 // should be no worse in this case.
3445 bool IsBFI = LSB != 0;
3446 uint64_t BFIImm = OrImm >> LSB;
3447 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3448 // We have a BFI instruction and we know the constant can't be materialized
3449 // with a ORR-immediate with the zero register.
3450 unsigned OrChunks = 0, BFIChunks = 0;
3451 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3452 if (((OrImm >> Shift) & 0xFFFF) != 0)
3453 ++OrChunks;
3454 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3455 ++BFIChunks;
3456 }
3457 if (BFIChunks > OrChunks)
3458 return false;
3459 }
3460
3461 // Materialize the constant to be inserted.
3462 SDLoc DL(N);
3463 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3464 SDNode *MOVI = CurDAG->getMachineNode(
3465 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3466
3467 // Create the BFI/BFXIL instruction.
3468 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3469 CurDAG->getTargetConstant(ImmR, DL, VT),
3470 CurDAG->getTargetConstant(ImmS, DL, VT)};
3471 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3472 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3473 return true;
3474}
3475
3477 SDValue &ShiftedOperand,
3478 uint64_t &EncodedShiftImm) {
3479 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3480 if (!Dst.hasOneUse())
3481 return false;
3482
3483 EVT VT = Dst.getValueType();
3484 assert((VT == MVT::i32 || VT == MVT::i64) &&
3485 "Caller should guarantee that VT is one of i32 or i64");
3486 const unsigned SizeInBits = VT.getSizeInBits();
3487
3488 SDLoc DL(Dst.getNode());
3489 uint64_t AndImm, ShlImm;
3490 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3491 isShiftedMask_64(AndImm)) {
3492 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3493 SDValue DstOp0 = Dst.getOperand(0);
3494 if (!DstOp0.hasOneUse())
3495 return false;
3496
3497 // An example to illustrate the transformation
3498 // From:
3499 // lsr x8, x1, #1
3500 // and x8, x8, #0x3f80
3501 // bfxil x8, x1, #0, #7
3502 // To:
3503 // and x8, x23, #0x7f
3504 // ubfx x9, x23, #8, #7
3505 // orr x23, x8, x9, lsl #7
3506 //
3507 // The number of instructions remains the same, but ORR is faster than BFXIL
3508 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3509 // the dependency chain is improved after the transformation.
3510 uint64_t SrlImm;
3511 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3512 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3513 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3514 unsigned MaskWidth =
3515 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3516 unsigned UBFMOpc =
3517 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3518 SDNode *UBFMNode = CurDAG->getMachineNode(
3519 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3520 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3521 VT),
3522 CurDAG->getTargetConstant(
3523 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3524 ShiftedOperand = SDValue(UBFMNode, 0);
3525 EncodedShiftImm = AArch64_AM::getShifterImm(
3526 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3527 return true;
3528 }
3529 }
3530 return false;
3531 }
3532
3533 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3534 ShiftedOperand = Dst.getOperand(0);
3535 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3536 return true;
3537 }
3538
3539 uint64_t SrlImm;
3540 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3541 ShiftedOperand = Dst.getOperand(0);
3542 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3543 return true;
3544 }
3545 return false;
3546}
3547
3548// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3549// the operands and select it to AArch64::ORR with shifted registers if
3550// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3551static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3552 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3553 const bool BiggerPattern) {
3554 EVT VT = N->getValueType(0);
3555 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3556 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3557 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3558 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3559 assert((VT == MVT::i32 || VT == MVT::i64) &&
3560 "Expect result type to be i32 or i64 since N is combinable to BFM");
3561 SDLoc DL(N);
3562
3563 // Bail out if BFM simplifies away one node in BFM Dst.
3564 if (OrOpd1 != Dst)
3565 return false;
3566
3567 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3568 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3569 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3570 if (BiggerPattern) {
3571 uint64_t SrcAndImm;
3572 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3573 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3574 // OrOpd0 = AND Src, #Mask
3575 // So BFM simplifies away one AND node from Src and doesn't simplify away
3576 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3577 // one node (from Rd), ORR is better since it has higher throughput and
3578 // smaller latency than BFM on many AArch64 processors (and for the rest
3579 // ORR is at least as good as BFM).
3580 SDValue ShiftedOperand;
3581 uint64_t EncodedShiftImm;
3582 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3583 EncodedShiftImm)) {
3584 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3585 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3586 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3587 return true;
3588 }
3589 }
3590 return false;
3591 }
3592
3593 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3594
3595 uint64_t ShlImm;
3596 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3597 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3598 SDValue Ops[] = {
3599 Dst, Src,
3600 CurDAG->getTargetConstant(
3602 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3603 return true;
3604 }
3605
3606 // Select the following pattern to left-shifted operand rather than BFI.
3607 // %val1 = op ..
3608 // %val2 = shl %val1, #imm
3609 // %res = or %val1, %val2
3610 //
3611 // If N is selected to be BFI, we know that
3612 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3613 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3614 //
3615 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3616 if (OrOpd0.getOperand(0) == OrOpd1) {
3617 SDValue Ops[] = {
3618 OrOpd1, OrOpd1,
3619 CurDAG->getTargetConstant(
3621 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3622 return true;
3623 }
3624 }
3625
3626 uint64_t SrlImm;
3627 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3628 // Select the following pattern to right-shifted operand rather than BFXIL.
3629 // %val1 = op ..
3630 // %val2 = lshr %val1, #imm
3631 // %res = or %val1, %val2
3632 //
3633 // If N is selected to be BFXIL, we know that
3634 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3635 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3636 //
3637 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3638 if (OrOpd0.getOperand(0) == OrOpd1) {
3639 SDValue Ops[] = {
3640 OrOpd1, OrOpd1,
3641 CurDAG->getTargetConstant(
3643 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3644 return true;
3645 }
3646 }
3647
3648 return false;
3649}
3650
3651static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3652 SelectionDAG *CurDAG) {
3653 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3654
3655 EVT VT = N->getValueType(0);
3656 if (VT != MVT::i32 && VT != MVT::i64)
3657 return false;
3658
3659 unsigned BitWidth = VT.getSizeInBits();
3660
3661 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3662 // have the expected shape. Try to undo that.
3663
3664 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3665 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3666
3667 // Given a OR operation, check if we have the following pattern
3668 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3669 // isBitfieldExtractOp)
3670 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3671 // countTrailingZeros(mask2) == imm2 - imm + 1
3672 // f = d | c
3673 // if yes, replace the OR instruction with:
3674 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3675
3676 // OR is commutative, check all combinations of operand order and values of
3677 // BiggerPattern, i.e.
3678 // Opd0, Opd1, BiggerPattern=false
3679 // Opd1, Opd0, BiggerPattern=false
3680 // Opd0, Opd1, BiggerPattern=true
3681 // Opd1, Opd0, BiggerPattern=true
3682 // Several of these combinations may match, so check with BiggerPattern=false
3683 // first since that will produce better results by matching more instructions
3684 // and/or inserting fewer extra instructions.
3685 for (int I = 0; I < 4; ++I) {
3686
3687 SDValue Dst, Src;
3688 unsigned ImmR, ImmS;
3689 bool BiggerPattern = I / 2;
3690 SDValue OrOpd0Val = N->getOperand(I % 2);
3691 SDNode *OrOpd0 = OrOpd0Val.getNode();
3692 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3693 SDNode *OrOpd1 = OrOpd1Val.getNode();
3694
3695 unsigned BFXOpc;
3696 int DstLSB, Width;
3697 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3698 NumberOfIgnoredLowBits, BiggerPattern)) {
3699 // Check that the returned opcode is compatible with the pattern,
3700 // i.e., same type and zero extended (U and not S)
3701 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3702 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3703 continue;
3704
3705 // Compute the width of the bitfield insertion
3706 DstLSB = 0;
3707 Width = ImmS - ImmR + 1;
3708 // FIXME: This constraint is to catch bitfield insertion we may
3709 // want to widen the pattern if we want to grab general bitfield
3710 // move case
3711 if (Width <= 0)
3712 continue;
3713
3714 // If the mask on the insertee is correct, we have a BFXIL operation. We
3715 // can share the ImmR and ImmS values from the already-computed UBFM.
3716 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3717 BiggerPattern,
3718 Src, DstLSB, Width)) {
3719 ImmR = (BitWidth - DstLSB) % BitWidth;
3720 ImmS = Width - 1;
3721 } else
3722 continue;
3723
3724 // Check the second part of the pattern
3725 EVT VT = OrOpd1Val.getValueType();
3726 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3727
3728 // Compute the Known Zero for the candidate of the first operand.
3729 // This allows to catch more general case than just looking for
3730 // AND with imm. Indeed, simplify-demanded-bits may have removed
3731 // the AND instruction because it proves it was useless.
3732 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3733
3734 // Check if there is enough room for the second operand to appear
3735 // in the first one
3736 APInt BitsToBeInserted =
3737 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3738
3739 if ((BitsToBeInserted & ~Known.Zero) != 0)
3740 continue;
3741
3742 // Set the first operand
3743 uint64_t Imm;
3744 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3745 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3746 // In that case, we can eliminate the AND
3747 Dst = OrOpd1->getOperand(0);
3748 else
3749 // Maybe the AND has been removed by simplify-demanded-bits
3750 // or is useful because it discards more bits
3751 Dst = OrOpd1Val;
3752
3753 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3754 // with shifted operand is more efficient.
3755 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3756 BiggerPattern))
3757 return true;
3758
3759 // both parts match
3760 SDLoc DL(N);
3761 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3762 CurDAG->getTargetConstant(ImmS, DL, VT)};
3763 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3764 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3765 return true;
3766 }
3767
3768 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3769 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3770 // mask (e.g., 0x000ffff0).
3771 uint64_t Mask0Imm, Mask1Imm;
3772 SDValue And0 = N->getOperand(0);
3773 SDValue And1 = N->getOperand(1);
3774 if (And0.hasOneUse() && And1.hasOneUse() &&
3775 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3776 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3777 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3778 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3779
3780 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3781 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3782 // bits to be inserted.
3783 if (isShiftedMask(Mask0Imm, VT)) {
3784 std::swap(And0, And1);
3785 std::swap(Mask0Imm, Mask1Imm);
3786 }
3787
3788 SDValue Src = And1->getOperand(0);
3789 SDValue Dst = And0->getOperand(0);
3790 unsigned LSB = llvm::countr_zero(Mask1Imm);
3791 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3792
3793 // The BFXIL inserts the low-order bits from a source register, so right
3794 // shift the needed bits into place.
3795 SDLoc DL(N);
3796 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3797 uint64_t LsrImm = LSB;
3798 if (Src->hasOneUse() &&
3799 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3800 (LsrImm + LSB) < BitWidth) {
3801 Src = Src->getOperand(0);
3802 LsrImm += LSB;
3803 }
3804
3805 SDNode *LSR = CurDAG->getMachineNode(
3806 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3807 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3808
3809 // BFXIL is an alias of BFM, so translate to BFM operands.
3810 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3811 unsigned ImmS = Width - 1;
3812
3813 // Create the BFXIL instruction.
3814 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3815 CurDAG->getTargetConstant(ImmR, DL, VT),
3816 CurDAG->getTargetConstant(ImmS, DL, VT)};
3817 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3818 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3819 return true;
3820 }
3821
3822 return false;
3823}
3824
3825bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3826 if (N->getOpcode() != ISD::OR)
3827 return false;
3828
3829 APInt NUsefulBits;
3830 getUsefulBits(SDValue(N, 0), NUsefulBits);
3831
3832 // If all bits are not useful, just return UNDEF.
3833 if (!NUsefulBits) {
3834 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3835 return true;
3836 }
3837
3838 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3839 return true;
3840
3841 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3842}
3843
3844/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3845/// equivalent of a left shift by a constant amount followed by an and masking
3846/// out a contiguous set of bits.
3847bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3848 if (N->getOpcode() != ISD::AND)
3849 return false;
3850
3851 EVT VT = N->getValueType(0);
3852 if (VT != MVT::i32 && VT != MVT::i64)
3853 return false;
3854
3855 SDValue Op0;
3856 int DstLSB, Width;
3857 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3858 Op0, DstLSB, Width))
3859 return false;
3860
3861 // ImmR is the rotate right amount.
3862 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3863 // ImmS is the most significant bit of the source to be moved.
3864 unsigned ImmS = Width - 1;
3865
3866 SDLoc DL(N);
3867 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3868 CurDAG->getTargetConstant(ImmS, DL, VT)};
3869 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3870 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3871 return true;
3872}
3873
3874/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3875/// variable shift/rotate instructions.
3876bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3877 EVT VT = N->getValueType(0);
3878
3879 unsigned Opc;
3880 switch (N->getOpcode()) {
3881 case ISD::ROTR:
3882 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3883 break;
3884 case ISD::SHL:
3885 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3886 break;
3887 case ISD::SRL:
3888 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3889 break;
3890 case ISD::SRA:
3891 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3892 break;
3893 default:
3894 return false;
3895 }
3896
3897 uint64_t Size;
3898 uint64_t Bits;
3899 if (VT == MVT::i32) {
3900 Bits = 5;
3901 Size = 32;
3902 } else if (VT == MVT::i64) {
3903 Bits = 6;
3904 Size = 64;
3905 } else
3906 return false;
3907
3908 SDValue ShiftAmt = N->getOperand(1);
3909 SDLoc DL(N);
3910 SDValue NewShiftAmt;
3911
3912 // Skip over an extend of the shift amount.
3913 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3914 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3915 ShiftAmt = ShiftAmt->getOperand(0);
3916
3917 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3918 SDValue Add0 = ShiftAmt->getOperand(0);
3919 SDValue Add1 = ShiftAmt->getOperand(1);
3920 uint64_t Add0Imm;
3921 uint64_t Add1Imm;
3922 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3923 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3924 // to avoid the ADD/SUB.
3925 NewShiftAmt = Add0;
3926 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3927 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3928 (Add0Imm % Size == 0)) {
3929 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3930 // to generate a NEG instead of a SUB from a constant.
3931 unsigned NegOpc;
3932 unsigned ZeroReg;
3933 EVT SubVT = ShiftAmt->getValueType(0);
3934 if (SubVT == MVT::i32) {
3935 NegOpc = AArch64::SUBWrr;
3936 ZeroReg = AArch64::WZR;
3937 } else {
3938 assert(SubVT == MVT::i64);
3939 NegOpc = AArch64::SUBXrr;
3940 ZeroReg = AArch64::XZR;
3941 }
3942 SDValue Zero =
3943 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3944 MachineSDNode *Neg =
3945 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3946 NewShiftAmt = SDValue(Neg, 0);
3947 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3948 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3949 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3950 // to generate a NOT instead of a SUB from a constant.
3951 unsigned NotOpc;
3952 unsigned ZeroReg;
3953 EVT SubVT = ShiftAmt->getValueType(0);
3954 if (SubVT == MVT::i32) {
3955 NotOpc = AArch64::ORNWrr;
3956 ZeroReg = AArch64::WZR;
3957 } else {
3958 assert(SubVT == MVT::i64);
3959 NotOpc = AArch64::ORNXrr;
3960 ZeroReg = AArch64::XZR;
3961 }
3962 SDValue Zero =
3963 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3964 MachineSDNode *Not =
3965 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3966 NewShiftAmt = SDValue(Not, 0);
3967 } else
3968 return false;
3969 } else {
3970 // If the shift amount is masked with an AND, check that the mask covers the
3971 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3972 // the AND.
3973 uint64_t MaskImm;
3974 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3975 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3976 return false;
3977
3978 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3979 return false;
3980
3981 NewShiftAmt = ShiftAmt->getOperand(0);
3982 }
3983
3984 // Narrow/widen the shift amount to match the size of the shift operation.
3985 if (VT == MVT::i32)
3986 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3987 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3988 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3989 MachineSDNode *Ext = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, VT,
3990 NewShiftAmt, SubReg);
3991 NewShiftAmt = SDValue(Ext, 0);
3992 }
3993
3994 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3995 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3996 return true;
3997}
3998
4000 SDValue &FixedPos,
4001 unsigned RegWidth,
4002 bool isReciprocal) {
4003 APFloat FVal(0.0);
4005 FVal = CN->getValueAPF();
4006 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
4007 // Some otherwise illegal constants are allowed in this case.
4008 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
4009 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
4010 return false;
4011
4012 ConstantPoolSDNode *CN =
4013 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
4014 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
4015 } else
4016 return false;
4017
4018 if (unsigned FBits =
4019 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal)) {
4020 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4021 return true;
4022 }
4023
4024 return false;
4025}
4026
4027bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4028 unsigned RegWidth) {
4029 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4030 /*isReciprocal*/ false);
4031}
4032
4033bool AArch64DAGToDAGISel::SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos,
4034 unsigned RegWidth) {
4035 if ((N.getOpcode() == AArch64ISD::NVCAST || N.getOpcode() == ISD::BITCAST) &&
4036 N.getValueType().getScalarSizeInBits() ==
4037 N.getOperand(0).getValueType().getScalarSizeInBits())
4038 N = N.getOperand(0);
4039
4040 auto ImmToFloat = [RegWidth](APInt Imm) {
4041 switch (RegWidth) {
4042 case 16:
4043 return APFloat(APFloat::IEEEhalf(), Imm);
4044 case 32:
4045 return APFloat(APFloat::IEEEsingle(), Imm);
4046 case 64:
4047 return APFloat(APFloat::IEEEdouble(), Imm);
4048 default:
4049 llvm_unreachable("Unexpected RegWidth!");
4050 };
4051 };
4052
4053 APFloat FVal(0.0);
4054 switch (N->getOpcode()) {
4055 case AArch64ISD::MOVIshift:
4056 FVal = ImmToFloat(APInt(RegWidth, N.getConstantOperandVal(0)
4057 << N.getConstantOperandVal(1)));
4058 break;
4059 case AArch64ISD::FMOV:
4060 assert(RegWidth == 32 || RegWidth == 64);
4061 if (RegWidth == 32)
4062 FVal = ImmToFloat(
4063 APInt(RegWidth, (uint32_t)AArch64_AM::decodeAdvSIMDModImmType11(
4064 N.getConstantOperandVal(0))));
4065 else
4066 FVal = ImmToFloat(APInt(RegWidth, AArch64_AM::decodeAdvSIMDModImmType12(
4067 N.getConstantOperandVal(0))));
4068 break;
4069 case AArch64ISD::DUP:
4070 if (isa<ConstantSDNode>(N.getOperand(0)))
4071 FVal = ImmToFloat(N.getConstantOperandAPInt(0).trunc(RegWidth));
4072 else
4073 return false;
4074 break;
4075 default:
4076 return false;
4077 }
4078
4079 if (unsigned FBits = CheckFixedPointOperandConstant(FVal, RegWidth,
4080 /*isReciprocal*/ false)) {
4081 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4082 return true;
4083 }
4084
4085 return false;
4086}
4087
4088bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4089 SDValue &FixedPos,
4090 unsigned RegWidth) {
4091 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4092 /*isReciprocal*/ true);
4093}
4094
4095// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4096// of the string and obtains the integer values from them and combines these
4097// into a single value to be used in the MRS/MSR instruction.
4100 RegString.split(Fields, ':');
4101
4102 if (Fields.size() == 1)
4103 return -1;
4104
4105 assert(Fields.size() == 5
4106 && "Invalid number of fields in read register string");
4107
4109 bool AllIntFields = true;
4110
4111 for (StringRef Field : Fields) {
4112 unsigned IntField;
4113 AllIntFields &= !Field.getAsInteger(10, IntField);
4114 Ops.push_back(IntField);
4115 }
4116
4117 assert(AllIntFields &&
4118 "Unexpected non-integer value in special register string.");
4119 (void)AllIntFields;
4120
4121 // Need to combine the integer fields of the string into a single value
4122 // based on the bit encoding of MRS/MSR instruction.
4123 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
4124 (Ops[3] << 3) | (Ops[4]);
4125}
4126
4127// Lower the read_register intrinsic to an MRS instruction node if the special
4128// register string argument is either of the form detailed in the ALCE (the
4129// form described in getIntOperandsFromRegisterString) or is a named register
4130// known by the MRS SysReg mapper.
4131bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4132 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4133 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4134 SDLoc DL(N);
4135
4136 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4137
4138 unsigned Opcode64Bit = AArch64::MRS;
4139 int Imm = getIntOperandFromRegisterString(RegString->getString());
4140 if (Imm == -1) {
4141 // No match, Use the sysreg mapper to map the remaining possible strings to
4142 // the value for the register to be used for the instruction operand.
4143 const auto *TheReg =
4144 AArch64SysReg::lookupSysRegByName(RegString->getString());
4145 if (TheReg && TheReg->Readable &&
4146 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4147 Imm = TheReg->Encoding;
4148 else
4149 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4150
4151 if (Imm == -1) {
4152 // Still no match, see if this is "pc" or give up.
4153 if (!ReadIs128Bit && RegString->getString() == "pc") {
4154 Opcode64Bit = AArch64::ADR;
4155 Imm = 0;
4156 } else {
4157 return false;
4158 }
4159 }
4160 }
4161
4162 SDValue InChain = N->getOperand(0);
4163 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4164 if (!ReadIs128Bit) {
4165 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4166 {SysRegImm, InChain});
4167 } else {
4168 SDNode *MRRS = CurDAG->getMachineNode(
4169 AArch64::MRRS, DL,
4170 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4171 {SysRegImm, InChain});
4172
4173 // Sysregs are not endian. The even register always contains the low half
4174 // of the register.
4175 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4176 SDValue(MRRS, 0));
4177 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4178 SDValue(MRRS, 0));
4179 SDValue OutChain = SDValue(MRRS, 1);
4180
4181 ReplaceUses(SDValue(N, 0), Lo);
4182 ReplaceUses(SDValue(N, 1), Hi);
4183 ReplaceUses(SDValue(N, 2), OutChain);
4184 };
4185 return true;
4186}
4187
4188// Lower the write_register intrinsic to an MSR instruction node if the special
4189// register string argument is either of the form detailed in the ALCE (the
4190// form described in getIntOperandsFromRegisterString) or is a named register
4191// known by the MSR SysReg mapper.
4192bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4193 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4194 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4195 SDLoc DL(N);
4196
4197 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4198
4199 if (!WriteIs128Bit) {
4200 // Check if the register was one of those allowed as the pstatefield value
4201 // in the MSR (immediate) instruction. To accept the values allowed in the
4202 // pstatefield for the MSR (immediate) instruction, we also require that an
4203 // immediate value has been provided as an argument, we know that this is
4204 // the case as it has been ensured by semantic checking.
4205 auto trySelectPState = [&](auto PMapper, unsigned State) {
4206 if (PMapper) {
4207 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4208 "Expected a constant integer expression.");
4209 unsigned Reg = PMapper->Encoding;
4210 uint64_t Immed = N->getConstantOperandVal(2);
4211 CurDAG->SelectNodeTo(
4212 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4213 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4214 return true;
4215 }
4216 return false;
4217 };
4218
4219 if (trySelectPState(
4220 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4221 AArch64::MSRpstateImm4))
4222 return true;
4223 if (trySelectPState(
4224 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4225 AArch64::MSRpstateImm1))
4226 return true;
4227 }
4228
4229 int Imm = getIntOperandFromRegisterString(RegString->getString());
4230 if (Imm == -1) {
4231 // Use the sysreg mapper to attempt to map the remaining possible strings
4232 // to the value for the register to be used for the MSR (register)
4233 // instruction operand.
4234 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4235 if (TheReg && TheReg->Writeable &&
4236 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4237 Imm = TheReg->Encoding;
4238 else
4239 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4240
4241 if (Imm == -1)
4242 return false;
4243 }
4244
4245 SDValue InChain = N->getOperand(0);
4246 if (!WriteIs128Bit) {
4247 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4248 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4249 N->getOperand(2), InChain);
4250 } else {
4251 // No endian swap. The lower half always goes into the even subreg, and the
4252 // higher half always into the odd supreg.
4253 SDNode *Pair = CurDAG->getMachineNode(
4254 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4255 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4256 MVT::i32),
4257 N->getOperand(2),
4258 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4259 N->getOperand(3),
4260 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4261
4262 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4263 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4264 SDValue(Pair, 0), InChain);
4265 }
4266
4267 return true;
4268}
4269
4270/// We've got special pseudo-instructions for these
4271bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4272 unsigned Opcode;
4273 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4274
4275 // Leave IR for LSE if subtarget supports it.
4276 if (Subtarget->hasLSE()) return false;
4277
4278 if (MemTy == MVT::i8)
4279 Opcode = AArch64::CMP_SWAP_8;
4280 else if (MemTy == MVT::i16)
4281 Opcode = AArch64::CMP_SWAP_16;
4282 else if (MemTy == MVT::i32)
4283 Opcode = AArch64::CMP_SWAP_32;
4284 else if (MemTy == MVT::i64)
4285 Opcode = AArch64::CMP_SWAP_64;
4286 else
4287 llvm_unreachable("Unknown AtomicCmpSwap type");
4288
4289 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4290 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4291 N->getOperand(0)};
4292 SDNode *CmpSwap = CurDAG->getMachineNode(
4293 Opcode, SDLoc(N),
4294 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4295
4296 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4297 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4298
4299 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4300 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4301 CurDAG->RemoveDeadNode(N);
4302
4303 return true;
4304}
4305
4306bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4307 SDValue &Shift, bool Negate) {
4308 if (!isa<ConstantSDNode>(N))
4309 return false;
4310
4311 SDLoc DL(N);
4312 APInt Val =
4313 cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4314
4315 if (Negate)
4316 Val = -Val;
4317
4318 switch (VT.SimpleTy) {
4319 case MVT::i8:
4320 // All immediates are supported.
4321 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4322 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4323 return true;
4324 case MVT::i16:
4325 case MVT::i32:
4326 case MVT::i64:
4327 // Support 8bit unsigned immediates.
4328 if ((Val & ~0xff) == 0) {
4329 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4330 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4331 return true;
4332 }
4333 // Support 16bit unsigned immediates that are a multiple of 256.
4334 if ((Val & ~0xff00) == 0) {
4335 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4336 Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
4337 return true;
4338 }
4339 break;
4340 default:
4341 break;
4342 }
4343
4344 return false;
4345}
4346
4347bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4348 SDValue &Imm, SDValue &Shift,
4349 bool Negate) {
4350 if (!isa<ConstantSDNode>(N))
4351 return false;
4352
4353 SDLoc DL(N);
4354 int64_t Val = cast<ConstantSDNode>(N)
4355 ->getAPIntValue()
4357 .getSExtValue();
4358
4359 if (Negate)
4360 Val = -Val;
4361
4362 // Signed saturating instructions treat their immediate operand as unsigned,
4363 // whereas the related intrinsics define their operands to be signed. This
4364 // means we can only use the immediate form when the operand is non-negative.
4365 if (Val < 0)
4366 return false;
4367
4368 switch (VT.SimpleTy) {
4369 case MVT::i8:
4370 // All positive immediates are supported.
4371 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4372 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4373 return true;
4374 case MVT::i16:
4375 case MVT::i32:
4376 case MVT::i64:
4377 // Support 8bit positive immediates.
4378 if (Val <= 255) {
4379 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4380 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4381 return true;
4382 }
4383 // Support 16bit positive immediates that are a multiple of 256.
4384 if (Val <= 65280 && Val % 256 == 0) {
4385 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4386 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4387 return true;
4388 }
4389 break;
4390 default:
4391 break;
4392 }
4393
4394 return false;
4395}
4396
4397bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4398 SDValue &Shift) {
4399 if (!isa<ConstantSDNode>(N))
4400 return false;
4401
4402 SDLoc DL(N);
4403 int64_t Val = cast<ConstantSDNode>(N)
4404 ->getAPIntValue()
4405 .trunc(VT.getFixedSizeInBits())
4406 .getSExtValue();
4407 int32_t ImmVal, ShiftVal;
4408 if (!AArch64_AM::isSVECpyDupImm(VT.getScalarSizeInBits(), Val, ImmVal,
4409 ShiftVal))
4410 return false;
4411
4412 Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32);
4413 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4414 return true;
4415}
4416
4417bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4418 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4419 int64_t ImmVal = CNode->getSExtValue();
4420 SDLoc DL(N);
4421 if (ImmVal >= -128 && ImmVal < 128) {
4422 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4423 return true;
4424 }
4425 }
4426 return false;
4427}
4428
4429bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4430 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4431 uint64_t ImmVal = CNode->getZExtValue();
4432
4433 switch (VT.SimpleTy) {
4434 case MVT::i8:
4435 ImmVal &= 0xFF;
4436 break;
4437 case MVT::i16:
4438 ImmVal &= 0xFFFF;
4439 break;
4440 case MVT::i32:
4441 ImmVal &= 0xFFFFFFFF;
4442 break;
4443 case MVT::i64:
4444 break;
4445 default:
4446 llvm_unreachable("Unexpected type");
4447 }
4448
4449 if (ImmVal < 256) {
4450 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4451 return true;
4452 }
4453 }
4454 return false;
4455}
4456
4457bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4458 bool Invert) {
4459 uint64_t ImmVal;
4460 if (auto CI = dyn_cast<ConstantSDNode>(N))
4461 ImmVal = CI->getZExtValue();
4462 else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
4463 ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
4464 else
4465 return false;
4466
4467 if (Invert)
4468 ImmVal = ~ImmVal;
4469
4470 uint64_t encoding;
4471 if (!AArch64_AM::isSVELogicalImm(VT.getScalarSizeInBits(), ImmVal, encoding))
4472 return false;
4473
4474 Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
4475 return true;
4476}
4477
4478// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4479// Rather than attempt to normalise everything we can sometimes saturate the
4480// shift amount during selection. This function also allows for consistent
4481// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4482// required by the instructions.
4483bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4484 uint64_t High, bool AllowSaturation,
4485 SDValue &Imm) {
4486 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4487 uint64_t ImmVal = CN->getZExtValue();
4488
4489 // Reject shift amounts that are too small.
4490 if (ImmVal < Low)
4491 return false;
4492
4493 // Reject or saturate shift amounts that are too big.
4494 if (ImmVal > High) {
4495 if (!AllowSaturation)
4496 return false;
4497 ImmVal = High;
4498 }
4499
4500 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4501 return true;
4502 }
4503
4504 return false;
4505}
4506
4507bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4508 // tagp(FrameIndex, IRGstack, tag_offset):
4509 // since the offset between FrameIndex and IRGstack is a compile-time
4510 // constant, this can be lowered to a single ADDG instruction.
4511 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4512 return false;
4513 }
4514
4515 SDValue IRG_SP = N->getOperand(2);
4516 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4517 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4518 return false;
4519 }
4520
4521 const TargetLowering *TLI = getTargetLowering();
4522 SDLoc DL(N);
4523 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4524 SDValue FiOp = CurDAG->getTargetFrameIndex(
4525 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4526 int TagOffset = N->getConstantOperandVal(3);
4527
4528 SDNode *Out = CurDAG->getMachineNode(
4529 AArch64::TAGPstack, DL, MVT::i64,
4530 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4531 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4532 ReplaceNode(N, Out);
4533 return true;
4534}
4535
4536void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4537 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4538 "llvm.aarch64.tagp third argument must be an immediate");
4539 if (trySelectStackSlotTagP(N))
4540 return;
4541 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4542 // compile-time constant, not just for stack allocations.
4543
4544 // General case for unrelated pointers in Op1 and Op2.
4545 SDLoc DL(N);
4546 int TagOffset = N->getConstantOperandVal(3);
4547 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4548 {N->getOperand(1), N->getOperand(2)});
4549 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4550 {SDValue(N1, 0), N->getOperand(2)});
4551 SDNode *N3 = CurDAG->getMachineNode(
4552 AArch64::ADDG, DL, MVT::i64,
4553 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4554 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4555 ReplaceNode(N, N3);
4556}
4557
4558bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4559 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4560
4561 // Bail when not a "cast" like insert_subvector.
4562 if (N->getConstantOperandVal(2) != 0)
4563 return false;
4564 if (!N->getOperand(0).isUndef())
4565 return false;
4566
4567 // Bail when normal isel should do the job.
4568 EVT VT = N->getValueType(0);
4569 EVT InVT = N->getOperand(1).getValueType();
4570 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4571 return false;
4572 if (InVT.getSizeInBits() <= 128)
4573 return false;
4574
4575 // NOTE: We can only get here when doing fixed length SVE code generation.
4576 // We do manual selection because the types involved are not linked to real
4577 // registers (despite being legal) and must be coerced into SVE registers.
4578
4580 "Expected to insert into a packed scalable vector!");
4581
4582 SDLoc DL(N);
4583 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4584 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4585 N->getOperand(1), RC));
4586 return true;
4587}
4588
4589bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4590 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4591
4592 // Bail when not a "cast" like extract_subvector.
4593 if (N->getConstantOperandVal(1) != 0)
4594 return false;
4595
4596 // Bail when normal isel can do the job.
4597 EVT VT = N->getValueType(0);
4598 EVT InVT = N->getOperand(0).getValueType();
4599 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4600 return false;
4601 if (VT.getSizeInBits() <= 128)
4602 return false;
4603
4604 // NOTE: We can only get here when doing fixed length SVE code generation.
4605 // We do manual selection because the types involved are not linked to real
4606 // registers (despite being legal) and must be coerced into SVE registers.
4607
4609 "Expected to extract from a packed scalable vector!");
4610
4611 SDLoc DL(N);
4612 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4613 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4614 N->getOperand(0), RC));
4615 return true;
4616}
4617
4618bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4619 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4620
4621 SDValue N0 = N->getOperand(0);
4622 SDValue N1 = N->getOperand(1);
4623
4624 EVT VT = N->getValueType(0);
4625 SDLoc DL(N);
4626
4627 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4628 // Rotate by a constant is a funnel shift in IR which is exanded to
4629 // an OR with shifted operands.
4630 // We do the following transform:
4631 // OR N0, N1 -> xar (x, y, imm)
4632 // Where:
4633 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4634 // N0 = SHL_PRED true, V, splat(bits-imm)
4635 // V = (xor x, y)
4636 if (VT.isScalableVector() &&
4637 (Subtarget->hasSVE2() ||
4638 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4639 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4640 N1.getOpcode() != AArch64ISD::SRL_PRED)
4641 std::swap(N0, N1);
4642 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4643 N1.getOpcode() != AArch64ISD::SRL_PRED)
4644 return false;
4645
4646 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4647 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4648 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4649 return false;
4650
4651 if (N0.getOperand(1) != N1.getOperand(1))
4652 return false;
4653
4654 SDValue R1, R2;
4655 bool IsXOROperand = true;
4656 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4657 IsXOROperand = false;
4658 } else {
4659 R1 = N0.getOperand(1).getOperand(0);
4660 R2 = N1.getOperand(1).getOperand(1);
4661 }
4662
4663 APInt ShlAmt, ShrAmt;
4664 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4666 return false;
4667
4668 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4669 return false;
4670
4671 if (!IsXOROperand) {
4672 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4673 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4674 SDValue MOVIV = SDValue(MOV, 0);
4675
4676 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4677 SDNode *SubRegToReg =
4678 CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, VT, MOVIV, ZSub);
4679
4680 R1 = N1->getOperand(1);
4681 R2 = SDValue(SubRegToReg, 0);
4682 }
4683
4684 SDValue Imm =
4685 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4686
4687 SDValue Ops[] = {R1, R2, Imm};
4689 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4690 AArch64::XAR_ZZZI_D})) {
4691 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4692 return true;
4693 }
4694 return false;
4695 }
4696
4697 // We have Neon SHA3 XAR operation for v2i64 but for types
4698 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4699 // is available.
4700 EVT SVT;
4701 switch (VT.getSimpleVT().SimpleTy) {
4702 case MVT::v4i32:
4703 case MVT::v2i32:
4704 SVT = MVT::nxv4i32;
4705 break;
4706 case MVT::v8i16:
4707 case MVT::v4i16:
4708 SVT = MVT::nxv8i16;
4709 break;
4710 case MVT::v16i8:
4711 case MVT::v8i8:
4712 SVT = MVT::nxv16i8;
4713 break;
4714 case MVT::v2i64:
4715 case MVT::v1i64:
4716 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4717 break;
4718 default:
4719 return false;
4720 }
4721
4722 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4723 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4724 return false;
4725
4726 if (N0->getOpcode() != AArch64ISD::VSHL ||
4727 N1->getOpcode() != AArch64ISD::VLSHR)
4728 return false;
4729
4730 if (N0->getOperand(0) != N1->getOperand(0))
4731 return false;
4732
4733 SDValue R1, R2;
4734 bool IsXOROperand = true;
4735 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4736 IsXOROperand = false;
4737 } else {
4738 SDValue XOR = N0.getOperand(0);
4739 R1 = XOR.getOperand(0);
4740 R2 = XOR.getOperand(1);
4741 }
4742
4743 unsigned HsAmt = N0.getConstantOperandVal(1);
4744 unsigned ShAmt = N1.getConstantOperandVal(1);
4745
4746 SDValue Imm = CurDAG->getTargetConstant(
4747 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4748
4749 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4750 if (ShAmt + HsAmt != VTSizeInBits)
4751 return false;
4752
4753 if (!IsXOROperand) {
4754 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4755 SDNode *MOV =
4756 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4757 SDValue MOVIV = SDValue(MOV, 0);
4758
4759 R1 = N1->getOperand(0);
4760 R2 = MOVIV;
4761 }
4762
4763 if (SVT != VT) {
4764 SDValue Undef =
4765 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4766
4767 if (SVT.isScalableVector() && VT.is64BitVector()) {
4768 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4769
4770 SDValue UndefQ = SDValue(
4771 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4772 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4773
4774 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4775 UndefQ, R1, DSub),
4776 0);
4777 if (R2.getValueType() == VT)
4778 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4779 UndefQ, R2, DSub),
4780 0);
4781 }
4782
4783 SDValue SubReg = CurDAG->getTargetConstant(
4784 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
4785
4786 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4787 R1, SubReg),
4788 0);
4789
4790 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4791 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4792 Undef, R2, SubReg),
4793 0);
4794 }
4795
4796 SDValue Ops[] = {R1, R2, Imm};
4797 SDNode *XAR = nullptr;
4798
4799 if (SVT.isScalableVector()) {
4801 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4802 AArch64::XAR_ZZZI_D}))
4803 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
4804 } else {
4805 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
4806 }
4807
4808 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4809
4810 if (SVT != VT) {
4811 if (VT.is64BitVector() && SVT.isScalableVector()) {
4812 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4813
4814 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4815 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
4816 SDValue(XAR, 0), ZSub);
4817
4818 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4819 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4820 SDValue(Q, 0), DSub);
4821 } else {
4822 SDValue SubReg = CurDAG->getTargetConstant(
4823 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4824 MVT::i32);
4825 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4826 SDValue(XAR, 0), SubReg);
4827 }
4828 }
4829 ReplaceNode(N, XAR);
4830 return true;
4831}
4832
4833void AArch64DAGToDAGISel::Select(SDNode *Node) {
4834 // If we have a custom node, we already have selected!
4835 if (Node->isMachineOpcode()) {
4836 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4837 Node->setNodeId(-1);
4838 return;
4839 }
4840
4841 // Few custom selection stuff.
4842 EVT VT = Node->getValueType(0);
4843
4844 switch (Node->getOpcode()) {
4845 default:
4846 break;
4847
4849 if (SelectCMP_SWAP(Node))
4850 return;
4851 break;
4852
4853 case ISD::READ_REGISTER:
4854 case AArch64ISD::MRRS:
4855 if (tryReadRegister(Node))
4856 return;
4857 break;
4858
4860 case AArch64ISD::MSRR:
4861 if (tryWriteRegister(Node))
4862 return;
4863 break;
4864
4865 case ISD::LOAD: {
4866 // Try to select as an indexed load. Fall through to normal processing
4867 // if we can't.
4868 if (tryIndexedLoad(Node))
4869 return;
4870 break;
4871 }
4872
4873 case ISD::SRL:
4874 case ISD::AND:
4875 case ISD::SRA:
4877 if (tryBitfieldExtractOp(Node))
4878 return;
4879 if (tryBitfieldInsertInZeroOp(Node))
4880 return;
4881 [[fallthrough]];
4882 case ISD::ROTR:
4883 case ISD::SHL:
4884 if (tryShiftAmountMod(Node))
4885 return;
4886 break;
4887
4888 case ISD::SIGN_EXTEND:
4889 if (tryBitfieldExtractOpFromSExt(Node))
4890 return;
4891 break;
4892
4893 case ISD::OR:
4894 if (tryBitfieldInsertOp(Node))
4895 return;
4896 if (trySelectXAR(Node))
4897 return;
4898 break;
4899
4901 if (trySelectCastScalableToFixedLengthVector(Node))
4902 return;
4903 break;
4904 }
4905
4906 case ISD::INSERT_SUBVECTOR: {
4907 if (trySelectCastFixedLengthToScalableVector(Node))
4908 return;
4909 break;
4910 }
4911
4912 case ISD::Constant: {
4913 // Materialize zero constants as copies from WZR/XZR. This allows
4914 // the coalescer to propagate these into other instructions.
4915 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4916 if (ConstNode->isZero()) {
4917 if (VT == MVT::i32) {
4918 SDValue New = CurDAG->getCopyFromReg(
4919 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4920 ReplaceNode(Node, New.getNode());
4921 return;
4922 } else if (VT == MVT::i64) {
4923 SDValue New = CurDAG->getCopyFromReg(
4924 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4925 ReplaceNode(Node, New.getNode());
4926 return;
4927 }
4928 }
4929 break;
4930 }
4931
4932 case ISD::FrameIndex: {
4933 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4934 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4935 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4936 const TargetLowering *TLI = getTargetLowering();
4937 SDValue TFI = CurDAG->getTargetFrameIndex(
4938 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4939 SDLoc DL(Node);
4940 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4941 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4942 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4943 return;
4944 }
4946 unsigned IntNo = Node->getConstantOperandVal(1);
4947 switch (IntNo) {
4948 default:
4949 break;
4950 case Intrinsic::aarch64_gcsss: {
4951 SDLoc DL(Node);
4952 SDValue Chain = Node->getOperand(0);
4953 SDValue Val = Node->getOperand(2);
4954 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4955 SDNode *SS1 =
4956 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4957 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4958 MVT::Other, Zero, SDValue(SS1, 0));
4959 ReplaceNode(Node, SS2);
4960 return;
4961 }
4962 case Intrinsic::aarch64_ldaxp:
4963 case Intrinsic::aarch64_ldxp: {
4964 unsigned Op =
4965 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4966 SDValue MemAddr = Node->getOperand(2);
4967 SDLoc DL(Node);
4968 SDValue Chain = Node->getOperand(0);
4969
4970 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4971 MVT::Other, MemAddr, Chain);
4972
4973 // Transfer memoperands.
4974 MachineMemOperand *MemOp =
4975 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4976 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4977 ReplaceNode(Node, Ld);
4978 return;
4979 }
4980 case Intrinsic::aarch64_stlxp:
4981 case Intrinsic::aarch64_stxp: {
4982 unsigned Op =
4983 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4984 SDLoc DL(Node);
4985 SDValue Chain = Node->getOperand(0);
4986 SDValue ValLo = Node->getOperand(2);
4987 SDValue ValHi = Node->getOperand(3);
4988 SDValue MemAddr = Node->getOperand(4);
4989
4990 // Place arguments in the right order.
4991 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4992
4993 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4994 // Transfer memoperands.
4995 MachineMemOperand *MemOp =
4996 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4997 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4998
4999 ReplaceNode(Node, St);
5000 return;
5001 }
5002 case Intrinsic::aarch64_neon_ld1x2:
5003 if (VT == MVT::v8i8) {
5004 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
5005 return;
5006 } else if (VT == MVT::v16i8) {
5007 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
5008 return;
5009 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5010 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
5011 return;
5012 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5013 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
5014 return;
5015 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5016 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
5017 return;
5018 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5019 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
5020 return;
5021 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5022 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5023 return;
5024 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5025 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
5026 return;
5027 }
5028 break;
5029 case Intrinsic::aarch64_neon_ld1x3:
5030 if (VT == MVT::v8i8) {
5031 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
5032 return;
5033 } else if (VT == MVT::v16i8) {
5034 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
5035 return;
5036 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5037 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
5038 return;
5039 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5040 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
5041 return;
5042 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5043 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
5044 return;
5045 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5046 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
5047 return;
5048 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5049 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5050 return;
5051 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5052 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5053 return;
5054 }
5055 break;
5056 case Intrinsic::aarch64_neon_ld1x4:
5057 if (VT == MVT::v8i8) {
5058 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5059 return;
5060 } else if (VT == MVT::v16i8) {
5061 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5062 return;
5063 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5064 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5065 return;
5066 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5067 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5068 return;
5069 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5070 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5071 return;
5072 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5073 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5074 return;
5075 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5076 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5077 return;
5078 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5079 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5080 return;
5081 }
5082 break;
5083 case Intrinsic::aarch64_neon_ld2:
5084 if (VT == MVT::v8i8) {
5085 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5086 return;
5087 } else if (VT == MVT::v16i8) {
5088 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5089 return;
5090 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5091 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5092 return;
5093 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5094 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5095 return;
5096 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5097 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5098 return;
5099 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5100 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5101 return;
5102 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5103 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5104 return;
5105 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5106 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5107 return;
5108 }
5109 break;
5110 case Intrinsic::aarch64_neon_ld3:
5111 if (VT == MVT::v8i8) {
5112 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5113 return;
5114 } else if (VT == MVT::v16i8) {
5115 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5116 return;
5117 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5118 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5119 return;
5120 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5121 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5122 return;
5123 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5124 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5125 return;
5126 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5127 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5128 return;
5129 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5130 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5131 return;
5132 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5133 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5134 return;
5135 }
5136 break;
5137 case Intrinsic::aarch64_neon_ld4:
5138 if (VT == MVT::v8i8) {
5139 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5140 return;
5141 } else if (VT == MVT::v16i8) {
5142 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5143 return;
5144 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5145 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5146 return;
5147 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5148 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5149 return;
5150 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5151 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5152 return;
5153 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5154 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5155 return;
5156 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5157 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5158 return;
5159 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5160 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5161 return;
5162 }
5163 break;
5164 case Intrinsic::aarch64_neon_ld2r:
5165 if (VT == MVT::v8i8) {
5166 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5167 return;
5168 } else if (VT == MVT::v16i8) {
5169 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5170 return;
5171 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5172 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5173 return;
5174 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5175 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5176 return;
5177 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5178 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5179 return;
5180 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5181 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5182 return;
5183 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5184 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5185 return;
5186 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5187 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5188 return;
5189 }
5190 break;
5191 case Intrinsic::aarch64_neon_ld3r:
5192 if (VT == MVT::v8i8) {
5193 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5194 return;
5195 } else if (VT == MVT::v16i8) {
5196 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5197 return;
5198 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5199 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5200 return;
5201 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5202 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5203 return;
5204 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5205 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5206 return;
5207 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5208 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5209 return;
5210 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5211 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5212 return;
5213 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5214 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5215 return;
5216 }
5217 break;
5218 case Intrinsic::aarch64_neon_ld4r:
5219 if (VT == MVT::v8i8) {
5220 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5221 return;
5222 } else if (VT == MVT::v16i8) {
5223 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5224 return;
5225 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5226 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5227 return;
5228 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5229 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5230 return;
5231 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5232 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5233 return;
5234 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5235 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5236 return;
5237 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5238 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5239 return;
5240 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5241 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5242 return;
5243 }
5244 break;
5245 case Intrinsic::aarch64_neon_ld2lane:
5246 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5247 SelectLoadLane(Node, 2, AArch64::LD2i8);
5248 return;
5249 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5250 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5251 SelectLoadLane(Node, 2, AArch64::LD2i16);
5252 return;
5253 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5254 VT == MVT::v2f32) {
5255 SelectLoadLane(Node, 2, AArch64::LD2i32);
5256 return;
5257 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5258 VT == MVT::v1f64) {
5259 SelectLoadLane(Node, 2, AArch64::LD2i64);
5260 return;
5261 }
5262 break;
5263 case Intrinsic::aarch64_neon_ld3lane:
5264 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5265 SelectLoadLane(Node, 3, AArch64::LD3i8);
5266 return;
5267 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5268 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5269 SelectLoadLane(Node, 3, AArch64::LD3i16);
5270 return;
5271 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5272 VT == MVT::v2f32) {
5273 SelectLoadLane(Node, 3, AArch64::LD3i32);
5274 return;
5275 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5276 VT == MVT::v1f64) {
5277 SelectLoadLane(Node, 3, AArch64::LD3i64);
5278 return;
5279 }
5280 break;
5281 case Intrinsic::aarch64_neon_ld4lane:
5282 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5283 SelectLoadLane(Node, 4, AArch64::LD4i8);
5284 return;
5285 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5286 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5287 SelectLoadLane(Node, 4, AArch64::LD4i16);
5288 return;
5289 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5290 VT == MVT::v2f32) {
5291 SelectLoadLane(Node, 4, AArch64::LD4i32);
5292 return;
5293 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5294 VT == MVT::v1f64) {
5295 SelectLoadLane(Node, 4, AArch64::LD4i64);
5296 return;
5297 }
5298 break;
5299 case Intrinsic::aarch64_ld64b:
5300 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5301 return;
5302 case Intrinsic::aarch64_sve_ld2q_sret: {
5303 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5304 return;
5305 }
5306 case Intrinsic::aarch64_sve_ld3q_sret: {
5307 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5308 return;
5309 }
5310 case Intrinsic::aarch64_sve_ld4q_sret: {
5311 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5312 return;
5313 }
5314 case Intrinsic::aarch64_sve_ld2_sret: {
5315 if (VT == MVT::nxv16i8) {
5316 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5317 true);
5318 return;
5319 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5320 VT == MVT::nxv8bf16) {
5321 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5322 true);
5323 return;
5324 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5325 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5326 true);
5327 return;
5328 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5329 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5330 true);
5331 return;
5332 }
5333 break;
5334 }
5335 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5336 if (VT == MVT::nxv16i8) {
5337 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5338 SelectContiguousMultiVectorLoad(
5339 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5340 else if (Subtarget->hasSVE2p1())
5341 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5342 AArch64::LD1B_2Z);
5343 else
5344 break;
5345 return;
5346 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5347 VT == MVT::nxv8bf16) {
5348 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5349 SelectContiguousMultiVectorLoad(
5350 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5351 else if (Subtarget->hasSVE2p1())
5352 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5353 AArch64::LD1H_2Z);
5354 else
5355 break;
5356 return;
5357 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5358 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5359 SelectContiguousMultiVectorLoad(
5360 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5361 else if (Subtarget->hasSVE2p1())
5362 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5363 AArch64::LD1W_2Z);
5364 else
5365 break;
5366 return;
5367 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5368 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5369 SelectContiguousMultiVectorLoad(
5370 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5371 else if (Subtarget->hasSVE2p1())
5372 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5373 AArch64::LD1D_2Z);
5374 else
5375 break;
5376 return;
5377 }
5378 break;
5379 }
5380 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5381 if (VT == MVT::nxv16i8) {
5382 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5383 SelectContiguousMultiVectorLoad(
5384 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5385 else if (Subtarget->hasSVE2p1())
5386 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5387 AArch64::LD1B_4Z);
5388 else
5389 break;
5390 return;
5391 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5392 VT == MVT::nxv8bf16) {
5393 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5394 SelectContiguousMultiVectorLoad(
5395 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5396 else if (Subtarget->hasSVE2p1())
5397 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5398 AArch64::LD1H_4Z);
5399 else
5400 break;
5401 return;
5402 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5403 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5404 SelectContiguousMultiVectorLoad(
5405 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5406 else if (Subtarget->hasSVE2p1())
5407 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5408 AArch64::LD1W_4Z);
5409 else
5410 break;
5411 return;
5412 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5413 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5414 SelectContiguousMultiVectorLoad(
5415 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5416 else if (Subtarget->hasSVE2p1())
5417 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5418 AArch64::LD1D_4Z);
5419 else
5420 break;
5421 return;
5422 }
5423 break;
5424 }
5425 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5426 if (VT == MVT::nxv16i8) {
5427 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5428 SelectContiguousMultiVectorLoad(Node, 2, 0,
5429 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5430 AArch64::LDNT1B_2Z_PSEUDO);
5431 else if (Subtarget->hasSVE2p1())
5432 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5433 AArch64::LDNT1B_2Z);
5434 else
5435 break;
5436 return;
5437 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5438 VT == MVT::nxv8bf16) {
5439 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5440 SelectContiguousMultiVectorLoad(Node, 2, 1,
5441 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5442 AArch64::LDNT1H_2Z_PSEUDO);
5443 else if (Subtarget->hasSVE2p1())
5444 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5445 AArch64::LDNT1H_2Z);
5446 else
5447 break;
5448 return;
5449 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5450 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5451 SelectContiguousMultiVectorLoad(Node, 2, 2,
5452 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5453 AArch64::LDNT1W_2Z_PSEUDO);
5454 else if (Subtarget->hasSVE2p1())
5455 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5456 AArch64::LDNT1W_2Z);
5457 else
5458 break;
5459 return;
5460 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5461 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5462 SelectContiguousMultiVectorLoad(Node, 2, 3,
5463 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5464 AArch64::LDNT1D_2Z_PSEUDO);
5465 else if (Subtarget->hasSVE2p1())
5466 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5467 AArch64::LDNT1D_2Z);
5468 else
5469 break;
5470 return;
5471 }
5472 break;
5473 }
5474 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5475 if (VT == MVT::nxv16i8) {
5476 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5477 SelectContiguousMultiVectorLoad(Node, 4, 0,
5478 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5479 AArch64::LDNT1B_4Z_PSEUDO);
5480 else if (Subtarget->hasSVE2p1())
5481 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5482 AArch64::LDNT1B_4Z);
5483 else
5484 break;
5485 return;
5486 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5487 VT == MVT::nxv8bf16) {
5488 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5489 SelectContiguousMultiVectorLoad(Node, 4, 1,
5490 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5491 AArch64::LDNT1H_4Z_PSEUDO);
5492 else if (Subtarget->hasSVE2p1())
5493 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5494 AArch64::LDNT1H_4Z);
5495 else
5496 break;
5497 return;
5498 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5499 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5500 SelectContiguousMultiVectorLoad(Node, 4, 2,
5501 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5502 AArch64::LDNT1W_4Z_PSEUDO);
5503 else if (Subtarget->hasSVE2p1())
5504 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5505 AArch64::LDNT1W_4Z);
5506 else
5507 break;
5508 return;
5509 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5510 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5511 SelectContiguousMultiVectorLoad(Node, 4, 3,
5512 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5513 AArch64::LDNT1D_4Z_PSEUDO);
5514 else if (Subtarget->hasSVE2p1())
5515 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5516 AArch64::LDNT1D_4Z);
5517 else
5518 break;
5519 return;
5520 }
5521 break;
5522 }
5523 case Intrinsic::aarch64_sve_ld3_sret: {
5524 if (VT == MVT::nxv16i8) {
5525 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5526 true);
5527 return;
5528 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5529 VT == MVT::nxv8bf16) {
5530 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5531 true);
5532 return;
5533 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5534 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5535 true);
5536 return;
5537 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5538 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5539 true);
5540 return;
5541 }
5542 break;
5543 }
5544 case Intrinsic::aarch64_sve_ld4_sret: {
5545 if (VT == MVT::nxv16i8) {
5546 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5547 true);
5548 return;
5549 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5550 VT == MVT::nxv8bf16) {
5551 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5552 true);
5553 return;
5554 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5555 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5556 true);
5557 return;
5558 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5559 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5560 true);
5561 return;
5562 }
5563 break;
5564 }
5565 case Intrinsic::aarch64_sme_read_hor_vg2: {
5566 if (VT == MVT::nxv16i8) {
5567 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5568 AArch64::MOVA_2ZMXI_H_B);
5569 return;
5570 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5571 VT == MVT::nxv8bf16) {
5572 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5573 AArch64::MOVA_2ZMXI_H_H);
5574 return;
5575 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5576 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5577 AArch64::MOVA_2ZMXI_H_S);
5578 return;
5579 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5580 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5581 AArch64::MOVA_2ZMXI_H_D);
5582 return;
5583 }
5584 break;
5585 }
5586 case Intrinsic::aarch64_sme_read_ver_vg2: {
5587 if (VT == MVT::nxv16i8) {
5588 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5589 AArch64::MOVA_2ZMXI_V_B);
5590 return;
5591 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5592 VT == MVT::nxv8bf16) {
5593 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5594 AArch64::MOVA_2ZMXI_V_H);
5595 return;
5596 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5597 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5598 AArch64::MOVA_2ZMXI_V_S);
5599 return;
5600 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5601 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5602 AArch64::MOVA_2ZMXI_V_D);
5603 return;
5604 }
5605 break;
5606 }
5607 case Intrinsic::aarch64_sme_read_hor_vg4: {
5608 if (VT == MVT::nxv16i8) {
5609 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5610 AArch64::MOVA_4ZMXI_H_B);
5611 return;
5612 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5613 VT == MVT::nxv8bf16) {
5614 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5615 AArch64::MOVA_4ZMXI_H_H);
5616 return;
5617 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5618 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5619 AArch64::MOVA_4ZMXI_H_S);
5620 return;
5621 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5622 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5623 AArch64::MOVA_4ZMXI_H_D);
5624 return;
5625 }
5626 break;
5627 }
5628 case Intrinsic::aarch64_sme_read_ver_vg4: {
5629 if (VT == MVT::nxv16i8) {
5630 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5631 AArch64::MOVA_4ZMXI_V_B);
5632 return;
5633 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5634 VT == MVT::nxv8bf16) {
5635 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5636 AArch64::MOVA_4ZMXI_V_H);
5637 return;
5638 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5639 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5640 AArch64::MOVA_4ZMXI_V_S);
5641 return;
5642 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5643 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5644 AArch64::MOVA_4ZMXI_V_D);
5645 return;
5646 }
5647 break;
5648 }
5649 case Intrinsic::aarch64_sme_read_vg1x2: {
5650 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5651 AArch64::MOVA_VG2_2ZMXI);
5652 return;
5653 }
5654 case Intrinsic::aarch64_sme_read_vg1x4: {
5655 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5656 AArch64::MOVA_VG4_4ZMXI);
5657 return;
5658 }
5659 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5660 if (VT == MVT::nxv16i8) {
5661 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5662 return;
5663 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5664 VT == MVT::nxv8bf16) {
5665 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5666 return;
5667 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5668 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5669 return;
5670 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5671 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5672 return;
5673 }
5674 break;
5675 }
5676 case Intrinsic::aarch64_sme_readz_vert_x2: {
5677 if (VT == MVT::nxv16i8) {
5678 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5679 return;
5680 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5681 VT == MVT::nxv8bf16) {
5682 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5683 return;
5684 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5685 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5686 return;
5687 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5688 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5689 return;
5690 }
5691 break;
5692 }
5693 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5694 if (VT == MVT::nxv16i8) {
5695 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5696 return;
5697 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5698 VT == MVT::nxv8bf16) {
5699 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5700 return;
5701 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5702 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5703 return;
5704 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5705 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5706 return;
5707 }
5708 break;
5709 }
5710 case Intrinsic::aarch64_sme_readz_vert_x4: {
5711 if (VT == MVT::nxv16i8) {
5712 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5713 return;
5714 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5715 VT == MVT::nxv8bf16) {
5716 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5717 return;
5718 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5719 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5720 return;
5721 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5722 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5723 return;
5724 }
5725 break;
5726 }
5727 case Intrinsic::aarch64_sme_readz_x2: {
5728 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5729 AArch64::ZA);
5730 return;
5731 }
5732 case Intrinsic::aarch64_sme_readz_x4: {
5733 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5734 AArch64::ZA);
5735 return;
5736 }
5737 case Intrinsic::swift_async_context_addr: {
5738 SDLoc DL(Node);
5739 SDValue Chain = Node->getOperand(0);
5740 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5741 SDValue Res = SDValue(
5742 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5743 CurDAG->getTargetConstant(8, DL, MVT::i32),
5744 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5745 0);
5746 ReplaceUses(SDValue(Node, 0), Res);
5747 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5748 CurDAG->RemoveDeadNode(Node);
5749
5750 auto &MF = CurDAG->getMachineFunction();
5751 MF.getFrameInfo().setFrameAddressIsTaken(true);
5752 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5753 return;
5754 }
5755 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5757 Node->getValueType(0),
5758 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5759 AArch64::LUTI2_4ZTZI_S}))
5760 // Second Immediate must be <= 3:
5761 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5762 return;
5763 }
5764 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5766 Node->getValueType(0),
5767 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5768 // Second Immediate must be <= 1:
5769 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5770 return;
5771 }
5772 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5774 Node->getValueType(0),
5775 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5776 AArch64::LUTI2_2ZTZI_S}))
5777 // Second Immediate must be <= 7:
5778 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5779 return;
5780 }
5781 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5783 Node->getValueType(0),
5784 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5785 AArch64::LUTI4_2ZTZI_S}))
5786 // Second Immediate must be <= 3:
5787 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5788 return;
5789 }
5790 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5791 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5792 return;
5793 }
5794 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5796 Node->getValueType(0),
5797 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5798 SelectCVTIntrinsicFP8(Node, 2, Opc);
5799 return;
5800 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5802 Node->getValueType(0),
5803 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5804 SelectCVTIntrinsicFP8(Node, 2, Opc);
5805 return;
5806 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5808 Node->getValueType(0),
5809 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5810 SelectCVTIntrinsicFP8(Node, 2, Opc);
5811 return;
5812 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5814 Node->getValueType(0),
5815 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5816 SelectCVTIntrinsicFP8(Node, 2, Opc);
5817 return;
5818 case Intrinsic::ptrauth_resign_load_relative:
5819 SelectPtrauthResign(Node);
5820 return;
5821 }
5822 } break;
5824 unsigned IntNo = Node->getConstantOperandVal(0);
5825 switch (IntNo) {
5826 default:
5827 break;
5828 case Intrinsic::aarch64_tagp:
5829 SelectTagP(Node);
5830 return;
5831
5832 case Intrinsic::ptrauth_auth:
5833 SelectPtrauthAuth(Node);
5834 return;
5835
5836 case Intrinsic::ptrauth_resign:
5837 SelectPtrauthResign(Node);
5838 return;
5839
5840 case Intrinsic::aarch64_neon_tbl2:
5841 SelectTable(Node, 2,
5842 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5843 false);
5844 return;
5845 case Intrinsic::aarch64_neon_tbl3:
5846 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5847 : AArch64::TBLv16i8Three,
5848 false);
5849 return;
5850 case Intrinsic::aarch64_neon_tbl4:
5851 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5852 : AArch64::TBLv16i8Four,
5853 false);
5854 return;
5855 case Intrinsic::aarch64_neon_tbx2:
5856 SelectTable(Node, 2,
5857 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5858 true);
5859 return;
5860 case Intrinsic::aarch64_neon_tbx3:
5861 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5862 : AArch64::TBXv16i8Three,
5863 true);
5864 return;
5865 case Intrinsic::aarch64_neon_tbx4:
5866 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5867 : AArch64::TBXv16i8Four,
5868 true);
5869 return;
5870 case Intrinsic::aarch64_sve_srshl_single_x2:
5872 Node->getValueType(0),
5873 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5874 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5875 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5876 return;
5877 case Intrinsic::aarch64_sve_srshl_single_x4:
5879 Node->getValueType(0),
5880 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5881 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5882 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5883 return;
5884 case Intrinsic::aarch64_sve_urshl_single_x2:
5886 Node->getValueType(0),
5887 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5888 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5889 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5890 return;
5891 case Intrinsic::aarch64_sve_urshl_single_x4:
5893 Node->getValueType(0),
5894 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5895 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5896 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5897 return;
5898 case Intrinsic::aarch64_sve_srshl_x2:
5900 Node->getValueType(0),
5901 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5902 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5903 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5904 return;
5905 case Intrinsic::aarch64_sve_srshl_x4:
5907 Node->getValueType(0),
5908 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5909 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5910 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5911 return;
5912 case Intrinsic::aarch64_sve_urshl_x2:
5914 Node->getValueType(0),
5915 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5916 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5917 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5918 return;
5919 case Intrinsic::aarch64_sve_urshl_x4:
5921 Node->getValueType(0),
5922 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5923 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5924 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5925 return;
5926 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5928 Node->getValueType(0),
5929 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5930 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5931 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5932 return;
5933 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5935 Node->getValueType(0),
5936 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5937 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5938 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5939 return;
5940 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5942 Node->getValueType(0),
5943 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5944 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5945 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5946 return;
5947 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5949 Node->getValueType(0),
5950 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5951 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5952 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5953 return;
5954 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
5956 Node->getValueType(0),
5957 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
5958 AArch64::FSCALE_2ZZ_D}))
5959 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5960 return;
5961 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
5963 Node->getValueType(0),
5964 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
5965 AArch64::FSCALE_4ZZ_D}))
5966 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5967 return;
5968 case Intrinsic::aarch64_sme_fp8_scale_x2:
5970 Node->getValueType(0),
5971 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
5972 AArch64::FSCALE_2Z2Z_D}))
5973 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5974 return;
5975 case Intrinsic::aarch64_sme_fp8_scale_x4:
5977 Node->getValueType(0),
5978 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
5979 AArch64::FSCALE_4Z4Z_D}))
5980 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5981 return;
5982 case Intrinsic::aarch64_sve_whilege_x2:
5984 Node->getValueType(0),
5985 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5986 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5987 SelectWhilePair(Node, Op);
5988 return;
5989 case Intrinsic::aarch64_sve_whilegt_x2:
5991 Node->getValueType(0),
5992 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5993 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5994 SelectWhilePair(Node, Op);
5995 return;
5996 case Intrinsic::aarch64_sve_whilehi_x2:
5998 Node->getValueType(0),
5999 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
6000 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
6001 SelectWhilePair(Node, Op);
6002 return;
6003 case Intrinsic::aarch64_sve_whilehs_x2:
6005 Node->getValueType(0),
6006 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
6007 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
6008 SelectWhilePair(Node, Op);
6009 return;
6010 case Intrinsic::aarch64_sve_whilele_x2:
6012 Node->getValueType(0),
6013 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
6014 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
6015 SelectWhilePair(Node, Op);
6016 return;
6017 case Intrinsic::aarch64_sve_whilelo_x2:
6019 Node->getValueType(0),
6020 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
6021 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
6022 SelectWhilePair(Node, Op);
6023 return;
6024 case Intrinsic::aarch64_sve_whilels_x2:
6026 Node->getValueType(0),
6027 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
6028 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
6029 SelectWhilePair(Node, Op);
6030 return;
6031 case Intrinsic::aarch64_sve_whilelt_x2:
6033 Node->getValueType(0),
6034 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
6035 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
6036 SelectWhilePair(Node, Op);
6037 return;
6038 case Intrinsic::aarch64_sve_smax_single_x2:
6040 Node->getValueType(0),
6041 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6042 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6043 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6044 return;
6045 case Intrinsic::aarch64_sve_umax_single_x2:
6047 Node->getValueType(0),
6048 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6049 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6050 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6051 return;
6052 case Intrinsic::aarch64_sve_fmax_single_x2:
6054 Node->getValueType(0),
6055 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6056 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6057 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6058 return;
6059 case Intrinsic::aarch64_sve_smax_single_x4:
6061 Node->getValueType(0),
6062 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6063 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6064 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6065 return;
6066 case Intrinsic::aarch64_sve_umax_single_x4:
6068 Node->getValueType(0),
6069 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6070 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6071 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6072 return;
6073 case Intrinsic::aarch64_sve_fmax_single_x4:
6075 Node->getValueType(0),
6076 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6077 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6078 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6079 return;
6080 case Intrinsic::aarch64_sve_smin_single_x2:
6082 Node->getValueType(0),
6083 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6084 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6085 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6086 return;
6087 case Intrinsic::aarch64_sve_umin_single_x2:
6089 Node->getValueType(0),
6090 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6091 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6092 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6093 return;
6094 case Intrinsic::aarch64_sve_fmin_single_x2:
6096 Node->getValueType(0),
6097 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6098 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6099 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6100 return;
6101 case Intrinsic::aarch64_sve_smin_single_x4:
6103 Node->getValueType(0),
6104 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6105 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6106 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6107 return;
6108 case Intrinsic::aarch64_sve_umin_single_x4:
6110 Node->getValueType(0),
6111 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6112 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6113 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6114 return;
6115 case Intrinsic::aarch64_sve_fmin_single_x4:
6117 Node->getValueType(0),
6118 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6119 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6120 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6121 return;
6122 case Intrinsic::aarch64_sve_smax_x2:
6124 Node->getValueType(0),
6125 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6126 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6127 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6128 return;
6129 case Intrinsic::aarch64_sve_umax_x2:
6131 Node->getValueType(0),
6132 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6133 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6134 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6135 return;
6136 case Intrinsic::aarch64_sve_fmax_x2:
6138 Node->getValueType(0),
6139 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6140 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6141 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6142 return;
6143 case Intrinsic::aarch64_sve_smax_x4:
6145 Node->getValueType(0),
6146 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6147 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6148 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6149 return;
6150 case Intrinsic::aarch64_sve_umax_x4:
6152 Node->getValueType(0),
6153 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6154 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6155 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6156 return;
6157 case Intrinsic::aarch64_sve_fmax_x4:
6159 Node->getValueType(0),
6160 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6161 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6162 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6163 return;
6164 case Intrinsic::aarch64_sme_famax_x2:
6166 Node->getValueType(0),
6167 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6168 AArch64::FAMAX_2Z2Z_D}))
6169 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6170 return;
6171 case Intrinsic::aarch64_sme_famax_x4:
6173 Node->getValueType(0),
6174 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6175 AArch64::FAMAX_4Z4Z_D}))
6176 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6177 return;
6178 case Intrinsic::aarch64_sme_famin_x2:
6180 Node->getValueType(0),
6181 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6182 AArch64::FAMIN_2Z2Z_D}))
6183 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6184 return;
6185 case Intrinsic::aarch64_sme_famin_x4:
6187 Node->getValueType(0),
6188 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6189 AArch64::FAMIN_4Z4Z_D}))
6190 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6191 return;
6192 case Intrinsic::aarch64_sve_smin_x2:
6194 Node->getValueType(0),
6195 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6196 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6197 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6198 return;
6199 case Intrinsic::aarch64_sve_umin_x2:
6201 Node->getValueType(0),
6202 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6203 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6204 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6205 return;
6206 case Intrinsic::aarch64_sve_fmin_x2:
6208 Node->getValueType(0),
6209 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6210 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6211 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6212 return;
6213 case Intrinsic::aarch64_sve_smin_x4:
6215 Node->getValueType(0),
6216 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6217 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6218 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6219 return;
6220 case Intrinsic::aarch64_sve_umin_x4:
6222 Node->getValueType(0),
6223 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6224 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6225 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6226 return;
6227 case Intrinsic::aarch64_sve_fmin_x4:
6229 Node->getValueType(0),
6230 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6231 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6232 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6233 return;
6234 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6236 Node->getValueType(0),
6237 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6238 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6239 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6240 return;
6241 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6243 Node->getValueType(0),
6244 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6245 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6246 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6247 return;
6248 case Intrinsic::aarch64_sve_fminnm_single_x2:
6250 Node->getValueType(0),
6251 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6252 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6253 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6254 return;
6255 case Intrinsic::aarch64_sve_fminnm_single_x4:
6257 Node->getValueType(0),
6258 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6259 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6260 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6261 return;
6262 case Intrinsic::aarch64_sve_fscale_single_x4:
6263 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::BFSCALE_4ZZ);
6264 return;
6265 case Intrinsic::aarch64_sve_fscale_single_x2:
6266 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::BFSCALE_2ZZ);
6267 return;
6268 case Intrinsic::aarch64_sve_fmul_single_x4:
6270 Node->getValueType(0),
6271 {AArch64::BFMUL_4ZZ, AArch64::FMUL_4ZZ_H, AArch64::FMUL_4ZZ_S,
6272 AArch64::FMUL_4ZZ_D}))
6273 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6274 return;
6275 case Intrinsic::aarch64_sve_fmul_single_x2:
6277 Node->getValueType(0),
6278 {AArch64::BFMUL_2ZZ, AArch64::FMUL_2ZZ_H, AArch64::FMUL_2ZZ_S,
6279 AArch64::FMUL_2ZZ_D}))
6280 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6281 return;
6282 case Intrinsic::aarch64_sve_fmaxnm_x2:
6284 Node->getValueType(0),
6285 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6286 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6287 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6288 return;
6289 case Intrinsic::aarch64_sve_fmaxnm_x4:
6291 Node->getValueType(0),
6292 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6293 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6294 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6295 return;
6296 case Intrinsic::aarch64_sve_fminnm_x2:
6298 Node->getValueType(0),
6299 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6300 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6301 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6302 return;
6303 case Intrinsic::aarch64_sve_fminnm_x4:
6305 Node->getValueType(0),
6306 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6307 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6308 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6309 return;
6310 case Intrinsic::aarch64_sve_aese_lane_x2:
6311 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESE_2ZZI_B);
6312 return;
6313 case Intrinsic::aarch64_sve_aesd_lane_x2:
6314 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESD_2ZZI_B);
6315 return;
6316 case Intrinsic::aarch64_sve_aesemc_lane_x2:
6317 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESEMC_2ZZI_B);
6318 return;
6319 case Intrinsic::aarch64_sve_aesdimc_lane_x2:
6320 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESDIMC_2ZZI_B);
6321 return;
6322 case Intrinsic::aarch64_sve_aese_lane_x4:
6323 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESE_4ZZI_B);
6324 return;
6325 case Intrinsic::aarch64_sve_aesd_lane_x4:
6326 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESD_4ZZI_B);
6327 return;
6328 case Intrinsic::aarch64_sve_aesemc_lane_x4:
6329 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESEMC_4ZZI_B);
6330 return;
6331 case Intrinsic::aarch64_sve_aesdimc_lane_x4:
6332 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESDIMC_4ZZI_B);
6333 return;
6334 case Intrinsic::aarch64_sve_pmlal_pair_x2:
6335 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::PMLAL_2ZZZ_Q);
6336 return;
6337 case Intrinsic::aarch64_sve_pmull_pair_x2: {
6338 SDLoc DL(Node);
6339 SmallVector<SDValue, 4> Regs(Node->ops().slice(1, 2));
6340 SDNode *Res =
6341 CurDAG->getMachineNode(AArch64::PMULL_2ZZZ_Q, DL, MVT::Untyped, Regs);
6342 SDValue SuperReg = SDValue(Res, 0);
6343 for (unsigned I = 0; I < 2; I++)
6344 ReplaceUses(SDValue(Node, I),
6345 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
6346 SuperReg));
6347 CurDAG->RemoveDeadNode(Node);
6348 return;
6349 }
6350 case Intrinsic::aarch64_sve_fscale_x4:
6351 SelectDestructiveMultiIntrinsic(Node, 4, true, AArch64::BFSCALE_4Z4Z);
6352 return;
6353 case Intrinsic::aarch64_sve_fscale_x2:
6354 SelectDestructiveMultiIntrinsic(Node, 2, true, AArch64::BFSCALE_2Z2Z);
6355 return;
6356 case Intrinsic::aarch64_sve_fmul_x4:
6358 Node->getValueType(0),
6359 {AArch64::BFMUL_4Z4Z, AArch64::FMUL_4Z4Z_H, AArch64::FMUL_4Z4Z_S,
6360 AArch64::FMUL_4Z4Z_D}))
6361 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6362 return;
6363 case Intrinsic::aarch64_sve_fmul_x2:
6365 Node->getValueType(0),
6366 {AArch64::BFMUL_2Z2Z, AArch64::FMUL_2Z2Z_H, AArch64::FMUL_2Z2Z_S,
6367 AArch64::FMUL_2Z2Z_D}))
6368 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6369 return;
6370 case Intrinsic::aarch64_sve_fcvtzs_x2:
6371 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6372 return;
6373 case Intrinsic::aarch64_sve_scvtf_x2:
6374 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6375 return;
6376 case Intrinsic::aarch64_sve_fcvtzu_x2:
6377 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6378 return;
6379 case Intrinsic::aarch64_sve_ucvtf_x2:
6380 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6381 return;
6382 case Intrinsic::aarch64_sve_fcvtzs_x4:
6383 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6384 return;
6385 case Intrinsic::aarch64_sve_scvtf_x4:
6386 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6387 return;
6388 case Intrinsic::aarch64_sve_fcvtzu_x4:
6389 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6390 return;
6391 case Intrinsic::aarch64_sve_ucvtf_x4:
6392 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6393 return;
6394 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6395 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6396 return;
6397 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6398 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6399 return;
6400 case Intrinsic::aarch64_sve_sclamp_single_x2:
6402 Node->getValueType(0),
6403 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6404 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6405 SelectClamp(Node, 2, Op);
6406 return;
6407 case Intrinsic::aarch64_sve_uclamp_single_x2:
6409 Node->getValueType(0),
6410 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6411 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6412 SelectClamp(Node, 2, Op);
6413 return;
6414 case Intrinsic::aarch64_sve_fclamp_single_x2:
6416 Node->getValueType(0),
6417 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6418 AArch64::FCLAMP_VG2_2Z2Z_D}))
6419 SelectClamp(Node, 2, Op);
6420 return;
6421 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6422 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6423 return;
6424 case Intrinsic::aarch64_sve_sclamp_single_x4:
6426 Node->getValueType(0),
6427 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6428 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6429 SelectClamp(Node, 4, Op);
6430 return;
6431 case Intrinsic::aarch64_sve_uclamp_single_x4:
6433 Node->getValueType(0),
6434 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6435 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6436 SelectClamp(Node, 4, Op);
6437 return;
6438 case Intrinsic::aarch64_sve_fclamp_single_x4:
6440 Node->getValueType(0),
6441 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6442 AArch64::FCLAMP_VG4_4Z4Z_D}))
6443 SelectClamp(Node, 4, Op);
6444 return;
6445 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6446 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6447 return;
6448 case Intrinsic::aarch64_sve_add_single_x2:
6450 Node->getValueType(0),
6451 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6452 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6453 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6454 return;
6455 case Intrinsic::aarch64_sve_add_single_x4:
6457 Node->getValueType(0),
6458 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6459 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6460 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6461 return;
6462 case Intrinsic::aarch64_sve_zip_x2:
6464 Node->getValueType(0),
6465 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6466 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6467 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6468 return;
6469 case Intrinsic::aarch64_sve_zipq_x2:
6470 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6471 AArch64::ZIP_VG2_2ZZZ_Q);
6472 return;
6473 case Intrinsic::aarch64_sve_zip_x4:
6475 Node->getValueType(0),
6476 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6477 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6478 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6479 return;
6480 case Intrinsic::aarch64_sve_zipq_x4:
6481 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6482 AArch64::ZIP_VG4_4Z4Z_Q);
6483 return;
6484 case Intrinsic::aarch64_sve_uzp_x2:
6486 Node->getValueType(0),
6487 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6488 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6489 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6490 return;
6491 case Intrinsic::aarch64_sve_uzpq_x2:
6492 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6493 AArch64::UZP_VG2_2ZZZ_Q);
6494 return;
6495 case Intrinsic::aarch64_sve_uzp_x4:
6497 Node->getValueType(0),
6498 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6499 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6500 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6501 return;
6502 case Intrinsic::aarch64_sve_uzpq_x4:
6503 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6504 AArch64::UZP_VG4_4Z4Z_Q);
6505 return;
6506 case Intrinsic::aarch64_sve_sel_x2:
6508 Node->getValueType(0),
6509 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6510 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6511 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6512 return;
6513 case Intrinsic::aarch64_sve_sel_x4:
6515 Node->getValueType(0),
6516 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6517 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6518 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6519 return;
6520 case Intrinsic::aarch64_sve_frinta_x2:
6521 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6522 return;
6523 case Intrinsic::aarch64_sve_frinta_x4:
6524 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6525 return;
6526 case Intrinsic::aarch64_sve_frintm_x2:
6527 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6528 return;
6529 case Intrinsic::aarch64_sve_frintm_x4:
6530 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6531 return;
6532 case Intrinsic::aarch64_sve_frintn_x2:
6533 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6534 return;
6535 case Intrinsic::aarch64_sve_frintn_x4:
6536 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6537 return;
6538 case Intrinsic::aarch64_sve_frintp_x2:
6539 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6540 return;
6541 case Intrinsic::aarch64_sve_frintp_x4:
6542 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6543 return;
6544 case Intrinsic::aarch64_sve_sunpk_x2:
6546 Node->getValueType(0),
6547 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6548 AArch64::SUNPK_VG2_2ZZ_D}))
6549 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6550 return;
6551 case Intrinsic::aarch64_sve_uunpk_x2:
6553 Node->getValueType(0),
6554 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6555 AArch64::UUNPK_VG2_2ZZ_D}))
6556 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6557 return;
6558 case Intrinsic::aarch64_sve_sunpk_x4:
6560 Node->getValueType(0),
6561 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6562 AArch64::SUNPK_VG4_4Z2Z_D}))
6563 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6564 return;
6565 case Intrinsic::aarch64_sve_uunpk_x4:
6567 Node->getValueType(0),
6568 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6569 AArch64::UUNPK_VG4_4Z2Z_D}))
6570 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6571 return;
6572 case Intrinsic::aarch64_sve_pext_x2: {
6574 Node->getValueType(0),
6575 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6576 AArch64::PEXT_2PCI_D}))
6577 SelectPExtPair(Node, Op);
6578 return;
6579 }
6580 }
6581 break;
6582 }
6583 case ISD::INTRINSIC_VOID: {
6584 unsigned IntNo = Node->getConstantOperandVal(1);
6585 if (Node->getNumOperands() >= 3)
6586 VT = Node->getOperand(2)->getValueType(0);
6587 switch (IntNo) {
6588 default:
6589 break;
6590 case Intrinsic::aarch64_neon_st1x2: {
6591 if (VT == MVT::v8i8) {
6592 SelectStore(Node, 2, AArch64::ST1Twov8b);
6593 return;
6594 } else if (VT == MVT::v16i8) {
6595 SelectStore(Node, 2, AArch64::ST1Twov16b);
6596 return;
6597 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6598 VT == MVT::v4bf16) {
6599 SelectStore(Node, 2, AArch64::ST1Twov4h);
6600 return;
6601 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6602 VT == MVT::v8bf16) {
6603 SelectStore(Node, 2, AArch64::ST1Twov8h);
6604 return;
6605 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6606 SelectStore(Node, 2, AArch64::ST1Twov2s);
6607 return;
6608 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6609 SelectStore(Node, 2, AArch64::ST1Twov4s);
6610 return;
6611 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6612 SelectStore(Node, 2, AArch64::ST1Twov2d);
6613 return;
6614 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6615 SelectStore(Node, 2, AArch64::ST1Twov1d);
6616 return;
6617 }
6618 break;
6619 }
6620 case Intrinsic::aarch64_neon_st1x3: {
6621 if (VT == MVT::v8i8) {
6622 SelectStore(Node, 3, AArch64::ST1Threev8b);
6623 return;
6624 } else if (VT == MVT::v16i8) {
6625 SelectStore(Node, 3, AArch64::ST1Threev16b);
6626 return;
6627 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6628 VT == MVT::v4bf16) {
6629 SelectStore(Node, 3, AArch64::ST1Threev4h);
6630 return;
6631 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6632 VT == MVT::v8bf16) {
6633 SelectStore(Node, 3, AArch64::ST1Threev8h);
6634 return;
6635 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6636 SelectStore(Node, 3, AArch64::ST1Threev2s);
6637 return;
6638 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6639 SelectStore(Node, 3, AArch64::ST1Threev4s);
6640 return;
6641 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6642 SelectStore(Node, 3, AArch64::ST1Threev2d);
6643 return;
6644 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6645 SelectStore(Node, 3, AArch64::ST1Threev1d);
6646 return;
6647 }
6648 break;
6649 }
6650 case Intrinsic::aarch64_neon_st1x4: {
6651 if (VT == MVT::v8i8) {
6652 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6653 return;
6654 } else if (VT == MVT::v16i8) {
6655 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6656 return;
6657 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6658 VT == MVT::v4bf16) {
6659 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6660 return;
6661 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6662 VT == MVT::v8bf16) {
6663 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6664 return;
6665 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6666 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6667 return;
6668 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6669 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6670 return;
6671 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6672 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6673 return;
6674 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6675 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6676 return;
6677 }
6678 break;
6679 }
6680 case Intrinsic::aarch64_neon_st2: {
6681 if (VT == MVT::v8i8) {
6682 SelectStore(Node, 2, AArch64::ST2Twov8b);
6683 return;
6684 } else if (VT == MVT::v16i8) {
6685 SelectStore(Node, 2, AArch64::ST2Twov16b);
6686 return;
6687 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6688 VT == MVT::v4bf16) {
6689 SelectStore(Node, 2, AArch64::ST2Twov4h);
6690 return;
6691 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6692 VT == MVT::v8bf16) {
6693 SelectStore(Node, 2, AArch64::ST2Twov8h);
6694 return;
6695 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6696 SelectStore(Node, 2, AArch64::ST2Twov2s);
6697 return;
6698 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6699 SelectStore(Node, 2, AArch64::ST2Twov4s);
6700 return;
6701 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6702 SelectStore(Node, 2, AArch64::ST2Twov2d);
6703 return;
6704 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6705 SelectStore(Node, 2, AArch64::ST1Twov1d);
6706 return;
6707 }
6708 break;
6709 }
6710 case Intrinsic::aarch64_neon_st3: {
6711 if (VT == MVT::v8i8) {
6712 SelectStore(Node, 3, AArch64::ST3Threev8b);
6713 return;
6714 } else if (VT == MVT::v16i8) {
6715 SelectStore(Node, 3, AArch64::ST3Threev16b);
6716 return;
6717 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6718 VT == MVT::v4bf16) {
6719 SelectStore(Node, 3, AArch64::ST3Threev4h);
6720 return;
6721 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6722 VT == MVT::v8bf16) {
6723 SelectStore(Node, 3, AArch64::ST3Threev8h);
6724 return;
6725 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6726 SelectStore(Node, 3, AArch64::ST3Threev2s);
6727 return;
6728 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6729 SelectStore(Node, 3, AArch64::ST3Threev4s);
6730 return;
6731 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6732 SelectStore(Node, 3, AArch64::ST3Threev2d);
6733 return;
6734 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6735 SelectStore(Node, 3, AArch64::ST1Threev1d);
6736 return;
6737 }
6738 break;
6739 }
6740 case Intrinsic::aarch64_neon_st4: {
6741 if (VT == MVT::v8i8) {
6742 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6743 return;
6744 } else if (VT == MVT::v16i8) {
6745 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6746 return;
6747 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6748 VT == MVT::v4bf16) {
6749 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6750 return;
6751 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6752 VT == MVT::v8bf16) {
6753 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6754 return;
6755 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6756 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6757 return;
6758 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6759 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6760 return;
6761 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6762 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6763 return;
6764 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6765 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6766 return;
6767 }
6768 break;
6769 }
6770 case Intrinsic::aarch64_neon_st2lane: {
6771 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6772 SelectStoreLane(Node, 2, AArch64::ST2i8);
6773 return;
6774 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6775 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6776 SelectStoreLane(Node, 2, AArch64::ST2i16);
6777 return;
6778 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6779 VT == MVT::v2f32) {
6780 SelectStoreLane(Node, 2, AArch64::ST2i32);
6781 return;
6782 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6783 VT == MVT::v1f64) {
6784 SelectStoreLane(Node, 2, AArch64::ST2i64);
6785 return;
6786 }
6787 break;
6788 }
6789 case Intrinsic::aarch64_neon_st3lane: {
6790 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6791 SelectStoreLane(Node, 3, AArch64::ST3i8);
6792 return;
6793 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6794 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6795 SelectStoreLane(Node, 3, AArch64::ST3i16);
6796 return;
6797 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6798 VT == MVT::v2f32) {
6799 SelectStoreLane(Node, 3, AArch64::ST3i32);
6800 return;
6801 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6802 VT == MVT::v1f64) {
6803 SelectStoreLane(Node, 3, AArch64::ST3i64);
6804 return;
6805 }
6806 break;
6807 }
6808 case Intrinsic::aarch64_neon_st4lane: {
6809 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6810 SelectStoreLane(Node, 4, AArch64::ST4i8);
6811 return;
6812 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6813 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6814 SelectStoreLane(Node, 4, AArch64::ST4i16);
6815 return;
6816 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6817 VT == MVT::v2f32) {
6818 SelectStoreLane(Node, 4, AArch64::ST4i32);
6819 return;
6820 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6821 VT == MVT::v1f64) {
6822 SelectStoreLane(Node, 4, AArch64::ST4i64);
6823 return;
6824 }
6825 break;
6826 }
6827 case Intrinsic::aarch64_sve_st2q: {
6828 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6829 return;
6830 }
6831 case Intrinsic::aarch64_sve_st3q: {
6832 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6833 return;
6834 }
6835 case Intrinsic::aarch64_sve_st4q: {
6836 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6837 return;
6838 }
6839 case Intrinsic::aarch64_sve_st2: {
6840 if (VT == MVT::nxv16i8) {
6841 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6842 return;
6843 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6844 VT == MVT::nxv8bf16) {
6845 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6846 return;
6847 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6848 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6849 return;
6850 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6851 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6852 return;
6853 }
6854 break;
6855 }
6856 case Intrinsic::aarch64_sve_st3: {
6857 if (VT == MVT::nxv16i8) {
6858 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6859 return;
6860 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6861 VT == MVT::nxv8bf16) {
6862 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6863 return;
6864 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6865 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6866 return;
6867 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6868 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6869 return;
6870 }
6871 break;
6872 }
6873 case Intrinsic::aarch64_sve_st4: {
6874 if (VT == MVT::nxv16i8) {
6875 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6876 return;
6877 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6878 VT == MVT::nxv8bf16) {
6879 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6880 return;
6881 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6882 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6883 return;
6884 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6885 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6886 return;
6887 }
6888 break;
6889 }
6890 }
6891 break;
6892 }
6893 case AArch64ISD::LD2post: {
6894 if (VT == MVT::v8i8) {
6895 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6896 return;
6897 } else if (VT == MVT::v16i8) {
6898 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6899 return;
6900 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6901 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6902 return;
6903 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6904 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6905 return;
6906 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6907 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6908 return;
6909 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6910 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6911 return;
6912 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6913 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6914 return;
6915 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6916 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6917 return;
6918 }
6919 break;
6920 }
6921 case AArch64ISD::LD3post: {
6922 if (VT == MVT::v8i8) {
6923 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6924 return;
6925 } else if (VT == MVT::v16i8) {
6926 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6927 return;
6928 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6929 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6930 return;
6931 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6932 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6933 return;
6934 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6935 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6936 return;
6937 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6938 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6939 return;
6940 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6941 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6942 return;
6943 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6944 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6945 return;
6946 }
6947 break;
6948 }
6949 case AArch64ISD::LD4post: {
6950 if (VT == MVT::v8i8) {
6951 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6952 return;
6953 } else if (VT == MVT::v16i8) {
6954 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6955 return;
6956 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6957 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6958 return;
6959 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6960 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6961 return;
6962 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6963 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6964 return;
6965 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6966 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6967 return;
6968 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6969 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6970 return;
6971 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6972 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6973 return;
6974 }
6975 break;
6976 }
6977 case AArch64ISD::LD1x2post: {
6978 if (VT == MVT::v8i8) {
6979 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6980 return;
6981 } else if (VT == MVT::v16i8) {
6982 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6983 return;
6984 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6985 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6986 return;
6987 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6988 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6989 return;
6990 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6991 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6992 return;
6993 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6994 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6995 return;
6996 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6997 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6998 return;
6999 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7000 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
7001 return;
7002 }
7003 break;
7004 }
7005 case AArch64ISD::LD1x3post: {
7006 if (VT == MVT::v8i8) {
7007 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
7008 return;
7009 } else if (VT == MVT::v16i8) {
7010 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
7011 return;
7012 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7013 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
7014 return;
7015 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7016 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
7017 return;
7018 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7019 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
7020 return;
7021 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7022 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
7023 return;
7024 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7025 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
7026 return;
7027 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7028 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
7029 return;
7030 }
7031 break;
7032 }
7033 case AArch64ISD::LD1x4post: {
7034 if (VT == MVT::v8i8) {
7035 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
7036 return;
7037 } else if (VT == MVT::v16i8) {
7038 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
7039 return;
7040 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7041 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
7042 return;
7043 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7044 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
7045 return;
7046 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7047 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
7048 return;
7049 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7050 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
7051 return;
7052 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7053 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
7054 return;
7055 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7056 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
7057 return;
7058 }
7059 break;
7060 }
7061 case AArch64ISD::LD1DUPpost: {
7062 if (VT == MVT::v8i8) {
7063 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
7064 return;
7065 } else if (VT == MVT::v16i8) {
7066 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
7067 return;
7068 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7069 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
7070 return;
7071 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7072 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
7073 return;
7074 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7075 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
7076 return;
7077 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7078 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
7079 return;
7080 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7081 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
7082 return;
7083 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7084 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
7085 return;
7086 }
7087 break;
7088 }
7089 case AArch64ISD::LD2DUPpost: {
7090 if (VT == MVT::v8i8) {
7091 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
7092 return;
7093 } else if (VT == MVT::v16i8) {
7094 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
7095 return;
7096 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7097 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
7098 return;
7099 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7100 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
7101 return;
7102 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7103 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
7104 return;
7105 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7106 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
7107 return;
7108 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7109 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
7110 return;
7111 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7112 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
7113 return;
7114 }
7115 break;
7116 }
7117 case AArch64ISD::LD3DUPpost: {
7118 if (VT == MVT::v8i8) {
7119 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
7120 return;
7121 } else if (VT == MVT::v16i8) {
7122 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7123 return;
7124 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7125 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7126 return;
7127 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7128 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7129 return;
7130 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7131 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7132 return;
7133 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7134 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7135 return;
7136 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7137 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7138 return;
7139 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7140 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7141 return;
7142 }
7143 break;
7144 }
7145 case AArch64ISD::LD4DUPpost: {
7146 if (VT == MVT::v8i8) {
7147 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7148 return;
7149 } else if (VT == MVT::v16i8) {
7150 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7151 return;
7152 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7153 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7154 return;
7155 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7156 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7157 return;
7158 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7159 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7160 return;
7161 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7162 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7163 return;
7164 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7165 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7166 return;
7167 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7168 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7169 return;
7170 }
7171 break;
7172 }
7173 case AArch64ISD::LD1LANEpost: {
7174 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7175 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7176 return;
7177 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7178 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7179 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7180 return;
7181 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7182 VT == MVT::v2f32) {
7183 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7184 return;
7185 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7186 VT == MVT::v1f64) {
7187 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7188 return;
7189 }
7190 break;
7191 }
7192 case AArch64ISD::LD2LANEpost: {
7193 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7194 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7195 return;
7196 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7197 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7198 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7199 return;
7200 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7201 VT == MVT::v2f32) {
7202 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7203 return;
7204 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7205 VT == MVT::v1f64) {
7206 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7207 return;
7208 }
7209 break;
7210 }
7211 case AArch64ISD::LD3LANEpost: {
7212 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7213 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7214 return;
7215 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7216 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7217 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7218 return;
7219 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7220 VT == MVT::v2f32) {
7221 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7222 return;
7223 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7224 VT == MVT::v1f64) {
7225 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7226 return;
7227 }
7228 break;
7229 }
7230 case AArch64ISD::LD4LANEpost: {
7231 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7232 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7233 return;
7234 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7235 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7236 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7237 return;
7238 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7239 VT == MVT::v2f32) {
7240 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7241 return;
7242 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7243 VT == MVT::v1f64) {
7244 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7245 return;
7246 }
7247 break;
7248 }
7249 case AArch64ISD::ST2post: {
7250 VT = Node->getOperand(1).getValueType();
7251 if (VT == MVT::v8i8) {
7252 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7253 return;
7254 } else if (VT == MVT::v16i8) {
7255 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7256 return;
7257 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7258 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7259 return;
7260 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7261 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7262 return;
7263 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7264 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7265 return;
7266 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7267 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7268 return;
7269 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7270 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7271 return;
7272 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7273 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7274 return;
7275 }
7276 break;
7277 }
7278 case AArch64ISD::ST3post: {
7279 VT = Node->getOperand(1).getValueType();
7280 if (VT == MVT::v8i8) {
7281 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7282 return;
7283 } else if (VT == MVT::v16i8) {
7284 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7285 return;
7286 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7287 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7288 return;
7289 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7290 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7291 return;
7292 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7293 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7294 return;
7295 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7296 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7297 return;
7298 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7299 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7300 return;
7301 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7302 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7303 return;
7304 }
7305 break;
7306 }
7307 case AArch64ISD::ST4post: {
7308 VT = Node->getOperand(1).getValueType();
7309 if (VT == MVT::v8i8) {
7310 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7311 return;
7312 } else if (VT == MVT::v16i8) {
7313 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7314 return;
7315 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7316 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7317 return;
7318 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7319 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7320 return;
7321 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7322 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7323 return;
7324 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7325 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7326 return;
7327 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7328 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7329 return;
7330 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7331 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7332 return;
7333 }
7334 break;
7335 }
7336 case AArch64ISD::ST1x2post: {
7337 VT = Node->getOperand(1).getValueType();
7338 if (VT == MVT::v8i8) {
7339 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7340 return;
7341 } else if (VT == MVT::v16i8) {
7342 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7343 return;
7344 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7345 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7346 return;
7347 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7348 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7349 return;
7350 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7351 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7352 return;
7353 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7354 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7355 return;
7356 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7357 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7358 return;
7359 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7360 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7361 return;
7362 }
7363 break;
7364 }
7365 case AArch64ISD::ST1x3post: {
7366 VT = Node->getOperand(1).getValueType();
7367 if (VT == MVT::v8i8) {
7368 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7369 return;
7370 } else if (VT == MVT::v16i8) {
7371 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7372 return;
7373 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7374 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7375 return;
7376 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7377 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7378 return;
7379 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7380 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7381 return;
7382 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7383 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7384 return;
7385 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7386 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7387 return;
7388 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7389 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7390 return;
7391 }
7392 break;
7393 }
7394 case AArch64ISD::ST1x4post: {
7395 VT = Node->getOperand(1).getValueType();
7396 if (VT == MVT::v8i8) {
7397 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7398 return;
7399 } else if (VT == MVT::v16i8) {
7400 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7401 return;
7402 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7403 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7404 return;
7405 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7406 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7407 return;
7408 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7409 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7410 return;
7411 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7412 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7413 return;
7414 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7415 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7416 return;
7417 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7418 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7419 return;
7420 }
7421 break;
7422 }
7423 case AArch64ISD::ST2LANEpost: {
7424 VT = Node->getOperand(1).getValueType();
7425 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7426 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7427 return;
7428 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7429 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7430 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7431 return;
7432 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7433 VT == MVT::v2f32) {
7434 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7435 return;
7436 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7437 VT == MVT::v1f64) {
7438 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7439 return;
7440 }
7441 break;
7442 }
7443 case AArch64ISD::ST3LANEpost: {
7444 VT = Node->getOperand(1).getValueType();
7445 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7446 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7447 return;
7448 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7449 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7450 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7451 return;
7452 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7453 VT == MVT::v2f32) {
7454 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7455 return;
7456 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7457 VT == MVT::v1f64) {
7458 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7459 return;
7460 }
7461 break;
7462 }
7463 case AArch64ISD::ST4LANEpost: {
7464 VT = Node->getOperand(1).getValueType();
7465 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7466 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7467 return;
7468 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7469 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7470 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7471 return;
7472 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7473 VT == MVT::v2f32) {
7474 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7475 return;
7476 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7477 VT == MVT::v1f64) {
7478 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7479 return;
7480 }
7481 break;
7482 }
7483 }
7484
7485 // Select the default instruction
7486 SelectCode(Node);
7487}
7488
7489/// createAArch64ISelDag - This pass converts a legalized DAG into a
7490/// AArch64-specific DAG, ready for instruction scheduling.
7492 CodeGenOptLevel OptLevel) {
7493 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7494}
7495
7496/// When \p PredVT is a scalable vector predicate in the form
7497/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7498/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7499/// structured vectors (NumVec >1), the output data type is
7500/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7501/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7502/// EVT.
7504 unsigned NumVec) {
7505 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7506 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7507 return EVT();
7508
7509 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7510 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7511 return EVT();
7512
7513 ElementCount EC = PredVT.getVectorElementCount();
7514 EVT ScalarVT =
7515 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7516 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7517
7518 return MemVT;
7519}
7520
7521/// Return the EVT of the data associated to a memory operation in \p
7522/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7524 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7525 return MemIntr->getMemoryVT();
7526
7527 if (isa<MemSDNode>(Root)) {
7528 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7529
7530 EVT DataVT;
7531 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7532 DataVT = Load->getValueType(0);
7533 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7534 DataVT = Load->getValueType(0);
7535 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7536 DataVT = Store->getValue().getValueType();
7537 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7538 DataVT = Store->getValue().getValueType();
7539 else
7540 llvm_unreachable("Unexpected MemSDNode!");
7541
7542 return DataVT.changeVectorElementType(Ctx, MemVT.getVectorElementType());
7543 }
7544
7545 const unsigned Opcode = Root->getOpcode();
7546 // For custom ISD nodes, we have to look at them individually to extract the
7547 // type of the data moved to/from memory.
7548 switch (Opcode) {
7549 case AArch64ISD::LD1_MERGE_ZERO:
7550 case AArch64ISD::LD1S_MERGE_ZERO:
7551 case AArch64ISD::LDNF1_MERGE_ZERO:
7552 case AArch64ISD::LDNF1S_MERGE_ZERO:
7553 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7554 case AArch64ISD::ST1_PRED:
7555 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7556 default:
7557 break;
7558 }
7559
7560 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7561 return EVT();
7562
7563 switch (Root->getConstantOperandVal(1)) {
7564 default:
7565 return EVT();
7566 case Intrinsic::aarch64_sme_ldr:
7567 case Intrinsic::aarch64_sme_str:
7568 return MVT::nxv16i8;
7569 case Intrinsic::aarch64_sve_prf:
7570 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7571 // width of the predicate.
7573 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7574 case Intrinsic::aarch64_sve_ld2_sret:
7575 case Intrinsic::aarch64_sve_ld2q_sret:
7577 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7578 case Intrinsic::aarch64_sve_st2q:
7580 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7581 case Intrinsic::aarch64_sve_ld3_sret:
7582 case Intrinsic::aarch64_sve_ld3q_sret:
7584 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7585 case Intrinsic::aarch64_sve_st3q:
7587 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7588 case Intrinsic::aarch64_sve_ld4_sret:
7589 case Intrinsic::aarch64_sve_ld4q_sret:
7591 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7592 case Intrinsic::aarch64_sve_st4q:
7594 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7595 case Intrinsic::aarch64_sve_ld1udq:
7596 case Intrinsic::aarch64_sve_st1dq:
7597 return EVT(MVT::nxv1i64);
7598 case Intrinsic::aarch64_sve_ld1uwq:
7599 case Intrinsic::aarch64_sve_st1wq:
7600 return EVT(MVT::nxv1i32);
7601 }
7602}
7603
7604/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7605/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7606/// where Root is the memory access using N for its address.
7607template <int64_t Min, int64_t Max>
7608bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7609 SDValue &Base,
7610 SDValue &OffImm) {
7611 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7612 const DataLayout &DL = CurDAG->getDataLayout();
7613 const MachineFrameInfo &MFI = MF->getFrameInfo();
7614
7615 if (N.getOpcode() == ISD::FrameIndex) {
7616 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7617 // We can only encode VL scaled offsets, so only fold in frame indexes
7618 // referencing SVE objects.
7619 if (MFI.hasScalableStackID(FI)) {
7620 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7621 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7622 return true;
7623 }
7624
7625 return false;
7626 }
7627
7628 if (MemVT == EVT())
7629 return false;
7630
7631 if (N.getOpcode() != ISD::ADD)
7632 return false;
7633
7634 SDValue VScale = N.getOperand(1);
7635 int64_t MulImm = std::numeric_limits<int64_t>::max();
7636 if (VScale.getOpcode() == ISD::VSCALE) {
7637 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7638 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7639 int64_t ByteOffset = C->getSExtValue();
7640 const auto KnownVScale =
7642
7643 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7644 return false;
7645
7646 MulImm = ByteOffset / KnownVScale;
7647 } else
7648 return false;
7649
7650 TypeSize TS = MemVT.getSizeInBits();
7651 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7652
7653 if ((MulImm % MemWidthBytes) != 0)
7654 return false;
7655
7656 int64_t Offset = MulImm / MemWidthBytes;
7658 return false;
7659
7660 Base = N.getOperand(0);
7661 if (Base.getOpcode() == ISD::FrameIndex) {
7662 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7663 // We can only encode VL scaled offsets, so only fold in frame indexes
7664 // referencing SVE objects.
7665 if (MFI.hasScalableStackID(FI))
7666 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7667 }
7668
7669 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7670 return true;
7671}
7672
7673/// Select register plus register addressing mode for SVE, with scaled
7674/// offset.
7675bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7676 SDValue &Base,
7677 SDValue &Offset) {
7678 if (N.getOpcode() != ISD::ADD)
7679 return false;
7680
7681 // Process an ADD node.
7682 const SDValue LHS = N.getOperand(0);
7683 const SDValue RHS = N.getOperand(1);
7684
7685 // 8 bit data does not come with the SHL node, so it is treated
7686 // separately.
7687 if (Scale == 0) {
7688 Base = LHS;
7689 Offset = RHS;
7690 return true;
7691 }
7692
7693 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7694 int64_t ImmOff = C->getSExtValue();
7695 unsigned Size = 1 << Scale;
7696
7697 // To use the reg+reg addressing mode, the immediate must be a multiple of
7698 // the vector element's byte size.
7699 if (ImmOff % Size)
7700 return false;
7701
7702 SDLoc DL(N);
7703 Base = LHS;
7704 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7705 SDValue Ops[] = {Offset};
7706 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7707 Offset = SDValue(MI, 0);
7708 return true;
7709 }
7710
7711 // Check if the RHS is a shift node with a constant.
7712 if (RHS.getOpcode() != ISD::SHL)
7713 return false;
7714
7715 const SDValue ShiftRHS = RHS.getOperand(1);
7716 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7717 if (C->getZExtValue() == Scale) {
7718 Base = LHS;
7719 Offset = RHS.getOperand(0);
7720 return true;
7721 }
7722
7723 return false;
7724}
7725
7726bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7727 const AArch64TargetLowering *TLI =
7728 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7729
7730 return TLI->isAllActivePredicate(*CurDAG, N);
7731}
7732
7733bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7734 EVT VT = N.getValueType();
7735 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7736}
7737
7738bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7740 unsigned Scale) {
7741 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7742 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
7743 int64_t ImmOff = C->getSExtValue();
7744 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7745 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7746 }
7747 return SDValue();
7748 };
7749
7750 if (SDValue C = MatchConstantOffset(N)) {
7751 Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7752 Offset = C;
7753 return true;
7754 }
7755
7756 // Try to untangle an ADD node into a 'reg + offset'
7757 if (CurDAG->isBaseWithConstantOffset(N)) {
7758 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7759 Base = N.getOperand(0);
7760 Offset = C;
7761 return true;
7762 }
7763 }
7764
7765 // By default, just match reg + 0.
7766 Base = N;
7767 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7768 return true;
7769}
7770
7771bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7772 SDValue &Imm) {
7774 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
7775 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
7776 // Check conservatively if the immediate fits the valid range [0, 64).
7777 // Immediate variants for GE and HS definitely need to be decremented
7778 // when lowering the pseudos later, so an immediate of 1 would become 0.
7779 // For the inverse conditions LT and LO we don't know for sure if they
7780 // will need a decrement but should the decision be made to reverse the
7781 // branch condition, we again end up with the need to decrement.
7782 // The same argument holds for LE, LS, GT and HI and possibly
7783 // incremented immediates. This can lead to slightly less optimal
7784 // codegen, e.g. we never codegen the legal case
7785 // cblt w0, #63, A
7786 // because we could end up with the illegal case
7787 // cbge w0, #64, B
7788 // should the decision to reverse the branch direction be made. For the
7789 // lower bound cases this is no problem since we can express comparisons
7790 // against 0 with either tbz/tnbz or using wzr/xzr.
7791 uint64_t LowerBound = 0, UpperBound = 64;
7792 switch (CC) {
7793 case AArch64CC::GE:
7794 case AArch64CC::HS:
7795 case AArch64CC::LT:
7796 case AArch64CC::LO:
7797 LowerBound = 1;
7798 break;
7799 case AArch64CC::LE:
7800 case AArch64CC::LS:
7801 case AArch64CC::GT:
7802 case AArch64CC::HI:
7803 UpperBound = 63;
7804 break;
7805 default:
7806 break;
7807 }
7808
7809 if (CN->getAPIntValue().uge(LowerBound) &&
7810 CN->getAPIntValue().ult(UpperBound)) {
7811 SDLoc DL(N);
7812 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
7813 return true;
7814 }
7815 }
7816
7817 return false;
7818}
7819
7820template <bool MatchCBB>
7821bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
7822 SDValue &ExtType) {
7823
7824 // Use an invalid shift-extend value to indicate we don't need to extend later
7825 if (N.getOpcode() == ISD::AssertZext || N.getOpcode() == ISD::AssertSext) {
7826 EVT Ty = cast<VTSDNode>(N.getOperand(1))->getVT();
7827 if (Ty != (MatchCBB ? MVT::i8 : MVT::i16))
7828 return false;
7829 Reg = N.getOperand(0);
7830 ExtType = CurDAG->getSignedTargetConstant(AArch64_AM::InvalidShiftExtend,
7831 SDLoc(N), MVT::i32);
7832 return true;
7833 }
7834
7836
7837 if ((MatchCBB && (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB)) ||
7838 (!MatchCBB && (ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH))) {
7839 Reg = N.getOperand(0);
7840 ExtType =
7841 CurDAG->getTargetConstant(getExtendEncoding(ET), SDLoc(N), MVT::i32);
7842 return true;
7843 }
7844
7845 return false;
7846}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
return SDValue()
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define DEBUG_TYPE
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_NAME
Value * RHS
Value * LHS
const AArch64RegisterInfo * getRegisterInfo() const override
bool isStreaming() const
Returns true if the function has a streaming body.
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
unsigned getSVEVectorSizeInBits() const
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1685
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1044
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1654
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1613
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1467
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasScalableStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getID() const
Return the register class ID number.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:440
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:967
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t decodeAdvSIMDModImmType12(uint8_t Imm)
static uint64_t decodeAdvSIMDModImmType11(uint8_t Imm)
unsigned getExtendEncoding(AArch64_AM::ShiftExtendType ET)
Mapping from extend bits to required operation: shifter: 000 ==> uxtb 001 ==> uxth 010 ==> uxtw 011 =...
static bool isSVELogicalImm(unsigned SizeInBits, uint64_t ImmVal, uint64_t &Encoding)
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Not(const Pred &P) -> Not< Pred >
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
unsigned CheckFixedPointOperandConstant(APFloat &FVal, unsigned RegWidth, bool isReciprocal)
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2026
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
Extended Value Type.
Definition ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:202
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.