LLVM 23.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64 void PreprocessISelDAG() override;
65
66 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
67 /// inline asm expressions.
68 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
69 InlineAsm::ConstraintCode ConstraintID,
70 std::vector<SDValue> &OutOps) override;
71
72 template <signed Low, signed High, signed Scale>
73 bool SelectRDVLImm(SDValue N, SDValue &Imm);
74
75 template <signed Low, signed High>
76 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
77
78 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
79 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
80 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
81 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
82 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
83 return SelectShiftedRegister(N, false, Reg, Shift);
84 }
85 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
86 return SelectShiftedRegister(N, true, Reg, Shift);
87 }
88 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
89 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
90 }
91 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
92 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
93 }
94 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
95 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
96 }
97 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
98 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
99 }
100 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
101 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
102 }
103 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
104 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
105 }
106 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
107 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
108 }
109 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
110 return SelectAddrModeIndexed(N, 1, Base, OffImm);
111 }
112 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
113 return SelectAddrModeIndexed(N, 2, Base, OffImm);
114 }
115 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
116 return SelectAddrModeIndexed(N, 4, Base, OffImm);
117 }
118 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
119 return SelectAddrModeIndexed(N, 8, Base, OffImm);
120 }
121 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
122 return SelectAddrModeIndexed(N, 16, Base, OffImm);
123 }
124 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
125 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
126 }
127 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
128 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
129 }
130 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
131 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
132 }
133 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
134 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
135 }
136 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
137 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
138 }
139 template <unsigned Size, unsigned Max>
140 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
141 // Test if there is an appropriate addressing mode and check if the
142 // immediate fits.
143 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
144 if (Found) {
145 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
146 int64_t C = CI->getSExtValue();
147 if (C <= Max)
148 return true;
149 }
150 }
151
152 // Otherwise, base only, materialize address in register.
153 Base = N;
154 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
155 return true;
156 }
157
158 template<int Width>
159 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
160 SDValue &SignExtend, SDValue &DoShift) {
161 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
162 }
163
164 template<int Width>
165 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
166 SDValue &SignExtend, SDValue &DoShift) {
167 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
168 }
169
170 bool SelectExtractHigh(SDValue N, SDValue &Res) {
171 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
172 N = N->getOperand(0);
173 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
174 !isa<ConstantSDNode>(N->getOperand(1)))
175 return false;
176 EVT VT = N->getValueType(0);
177 EVT LVT = N->getOperand(0).getValueType();
178 unsigned Index = N->getConstantOperandVal(1);
179 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
180 Index != VT.getVectorNumElements())
181 return false;
182 Res = N->getOperand(0);
183 return true;
184 }
185
186 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
187 if (N.getOpcode() != AArch64ISD::VLSHR)
188 return false;
189 SDValue Op = N->getOperand(0);
190 EVT VT = Op.getValueType();
191 unsigned ShtAmt = N->getConstantOperandVal(1);
192 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
193 return false;
194
195 APInt Imm;
196 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
197 Imm = APInt(VT.getScalarSizeInBits(),
198 Op.getOperand(1).getConstantOperandVal(0)
199 << Op.getOperand(1).getConstantOperandVal(1));
200 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
201 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
202 Imm = APInt(VT.getScalarSizeInBits(),
203 Op.getOperand(1).getConstantOperandVal(0));
204 else
205 return false;
206
207 if (Imm != 1ULL << (ShtAmt - 1))
208 return false;
209
210 Res1 = Op.getOperand(0);
211 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
212 return true;
213 }
214
215 bool SelectDupZeroOrUndef(SDValue N) {
216 switch(N->getOpcode()) {
217 case ISD::UNDEF:
218 return true;
219 case AArch64ISD::DUP:
220 case ISD::SPLAT_VECTOR: {
221 auto Opnd0 = N->getOperand(0);
222 if (isNullConstant(Opnd0))
223 return true;
224 if (isNullFPConstant(Opnd0))
225 return true;
226 break;
227 }
228 default:
229 break;
230 }
231
232 return false;
233 }
234
235 bool SelectAny(SDValue) { return true; }
236
237 bool SelectDupZero(SDValue N) {
238 switch(N->getOpcode()) {
239 case AArch64ISD::DUP:
240 case ISD::SPLAT_VECTOR: {
241 auto Opnd0 = N->getOperand(0);
242 if (isNullConstant(Opnd0))
243 return true;
244 if (isNullFPConstant(Opnd0))
245 return true;
246 break;
247 }
248 }
249
250 return false;
251 }
252
253 template <MVT::SimpleValueType VT, bool Negate>
254 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
255 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
256 }
257
258 template <MVT::SimpleValueType VT, bool Negate>
259 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
260 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
261 }
262
263 template <MVT::SimpleValueType VT>
264 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
265 return SelectSVECpyDupImm(N, VT, Imm, Shift);
266 }
267
268 template <MVT::SimpleValueType VT, bool Invert = false>
269 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
270 return SelectSVELogicalImm(N, VT, Imm, Invert);
271 }
272
273 template <MVT::SimpleValueType VT>
274 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
275 return SelectSVEArithImm(N, VT, Imm);
276 }
277
278 template <unsigned Low, unsigned High, bool AllowSaturation = false>
279 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
280 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
281 }
282
283 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
284 if (N->getOpcode() != ISD::SPLAT_VECTOR)
285 return false;
286
287 EVT EltVT = N->getValueType(0).getVectorElementType();
288 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
289 /* High */ EltVT.getFixedSizeInBits(),
290 /* AllowSaturation */ true, Imm);
291 }
292
293 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
294 template<signed Min, signed Max, signed Scale, bool Shift>
295 bool SelectCntImm(SDValue N, SDValue &Imm) {
297 return false;
298
299 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
300 if (Shift)
301 MulImm = 1LL << MulImm;
302
303 if ((MulImm % std::abs(Scale)) != 0)
304 return false;
305
306 MulImm /= Scale;
307 if ((MulImm >= Min) && (MulImm <= Max)) {
308 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
309 return true;
310 }
311
312 return false;
313 }
314
315 template <signed Max, signed Scale>
316 bool SelectEXTImm(SDValue N, SDValue &Imm) {
318 return false;
319
320 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
321
322 if (MulImm >= 0 && MulImm <= Max) {
323 MulImm *= Scale;
324 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
325 return true;
326 }
327
328 return false;
329 }
330
331 template <unsigned BaseReg, unsigned Max>
332 bool ImmToReg(SDValue N, SDValue &Imm) {
333 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
334 uint64_t C = CI->getZExtValue();
335
336 if (C > Max)
337 return false;
338
339 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
340 return true;
341 }
342 return false;
343 }
344
345 /// Form sequences of consecutive 64/128-bit registers for use in NEON
346 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
347 /// between 1 and 4 elements. If it contains a single element that is returned
348 /// unchanged; otherwise a REG_SEQUENCE value is returned.
351 // Form a sequence of SVE registers for instructions using list of vectors,
352 // e.g. structured loads and stores (ldN, stN).
353 SDValue createZTuple(ArrayRef<SDValue> Vecs);
354
355 // Similar to above, except the register must start at a multiple of the
356 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
357 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
358
359 /// Generic helper for the createDTuple/createQTuple
360 /// functions. Those should almost always be called instead.
361 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
362 const unsigned SubRegs[]);
363
364 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
365
366 bool tryIndexedLoad(SDNode *N);
367
368 void SelectPtrauthAuth(SDNode *N);
369 void SelectPtrauthResign(SDNode *N);
370
371 bool trySelectStackSlotTagP(SDNode *N);
372 void SelectTagP(SDNode *N);
373
374 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
375 unsigned SubRegIdx);
376 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
377 unsigned SubRegIdx);
378 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
380 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
381 unsigned Opc_rr, unsigned Opc_ri,
382 bool IsIntr = false);
383 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
384 unsigned Scale, unsigned Opc_ri,
385 unsigned Opc_rr);
386 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
387 bool IsZmMulti, unsigned Opcode,
388 bool HasPred = false);
389 void SelectPExtPair(SDNode *N, unsigned Opc);
390 void SelectWhilePair(SDNode *N, unsigned Opc);
391 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
392 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
394 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
395 bool IsTupleInput, unsigned Opc);
396 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
397
398 template <unsigned MaxIdx, unsigned Scale>
399 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
400 unsigned Op);
401 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
402 unsigned Op, unsigned MaxIdx, unsigned Scale,
403 unsigned BaseReg = 0);
404 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
405 /// SVE Reg+Imm addressing mode.
406 template <int64_t Min, int64_t Max>
407 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
408 SDValue &OffImm);
409 /// SVE Reg+Reg address mode.
410 template <unsigned Scale>
411 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
412 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
413 }
414
415 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
416 unsigned Opc, uint32_t MaxImm);
417
418 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
419
420 template <unsigned MaxIdx, unsigned Scale>
421 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
422 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
423 }
424
425 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
426 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
428 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
429 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
430 unsigned Opc_rr, unsigned Opc_ri);
431 std::tuple<unsigned, SDValue, SDValue>
432 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
433 const SDValue &OldBase, const SDValue &OldOffset,
434 unsigned Scale);
435
436 bool tryBitfieldExtractOp(SDNode *N);
437 bool tryBitfieldExtractOpFromSExt(SDNode *N);
438 bool tryBitfieldInsertOp(SDNode *N);
439 bool tryBitfieldInsertInZeroOp(SDNode *N);
440 bool tryShiftAmountMod(SDNode *N);
441
442 bool tryReadRegister(SDNode *N);
443 bool tryWriteRegister(SDNode *N);
444
445 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
446 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
447
448 bool trySelectXAR(SDNode *N);
449
450// Include the pieces autogenerated from the target description.
451#include "AArch64GenDAGISel.inc"
452
453private:
454 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
455 SDValue &Shift);
456 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
457 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
458 SDValue &OffImm) {
459 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
460 }
461 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
462 unsigned Size, SDValue &Base,
463 SDValue &OffImm);
464 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
465 SDValue &OffImm);
466 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
467 SDValue &OffImm);
468 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
469 SDValue &Offset, SDValue &SignExtend,
470 SDValue &DoShift);
471 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
472 SDValue &Offset, SDValue &SignExtend,
473 SDValue &DoShift);
474 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
475 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
476 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
477 SDValue &Offset, SDValue &SignExtend);
478
479 template<unsigned RegWidth>
480 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
481 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
482 }
483 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
484
485 template <unsigned RegWidth>
486 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos) {
487 return SelectCVTFixedPointVec(N, FixedPos, RegWidth);
488 }
489 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos, unsigned Width);
490
491 template<unsigned RegWidth>
492 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
493 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
494 }
495
496 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
497 unsigned Width);
498
499 bool SelectCMP_SWAP(SDNode *N);
500
501 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
502 bool Negate);
503 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
504 bool Negate);
505 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
506 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
507
508 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
509 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
510 bool AllowSaturation, SDValue &Imm);
511
512 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
513 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
514 SDValue &Offset);
515 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
516 SDValue &Offset, unsigned Scale = 1);
517
518 bool SelectAllActivePredicate(SDValue N);
519 bool SelectAnyPredicate(SDValue N);
520
521 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
522
523 template <bool MatchCBB>
524 bool SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, SDValue &ExtType);
525};
526
527class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
528public:
529 static char ID;
530 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
531 CodeGenOptLevel OptLevel)
533 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
534};
535} // end anonymous namespace
536
537char AArch64DAGToDAGISelLegacy::ID = 0;
538
539INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
540
541/// addBitcastHints - This method adds bitcast hints to the operands of a node
542/// to help instruction selector determine which operands are in Neon registers.
544 SDLoc DL(&N);
545 auto getFloatVT = [&](EVT VT) {
546 EVT ScalarVT = VT.getScalarType();
547 assert((ScalarVT == MVT::i32 || ScalarVT == MVT::i64) && "Unexpected VT");
548 return VT.changeElementType(*(DAG.getContext()),
549 ScalarVT == MVT::i32 ? MVT::f32 : MVT::f64);
550 };
552 NewOps.reserve(N.getNumOperands());
553
554 for (unsigned I = 0, E = N.getNumOperands(); I < E; ++I) {
555 auto bitcasted = DAG.getBitcast(getFloatVT(N.getOperand(I).getValueType()),
556 N.getOperand(I));
557 NewOps.push_back(bitcasted);
558 }
559 EVT OrigVT = N.getValueType(0);
560 SDValue OpNode = DAG.getNode(N.getOpcode(), DL, getFloatVT(OrigVT), NewOps);
561 return DAG.getBitcast(OrigVT, OpNode);
562}
563
564/// isIntImmediate - This method tests to see if the node is a constant
565/// operand. If so Imm will receive the 32-bit value.
566static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
568 Imm = C->getZExtValue();
569 return true;
570 }
571 return false;
572}
573
574// isIntImmediate - This method tests to see if a constant operand.
575// If so Imm will receive the value.
576static bool isIntImmediate(SDValue N, uint64_t &Imm) {
577 return isIntImmediate(N.getNode(), Imm);
578}
579
580// isOpcWithIntImmediate - This method tests to see if the node is a specific
581// opcode and that it has a immediate integer right operand.
582// If so Imm will receive the 32 bit value.
583static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
584 uint64_t &Imm) {
585 return N->getOpcode() == Opc &&
586 isIntImmediate(N->getOperand(1).getNode(), Imm);
587}
588
589// isIntImmediateEq - This method tests to see if N is a constant operand that
590// is equivalent to 'ImmExpected'.
591#ifndef NDEBUG
592static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
593 uint64_t Imm;
594 if (!isIntImmediate(N.getNode(), Imm))
595 return false;
596 return Imm == ImmExpected;
597}
598#endif
599
600bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
601 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
602 std::vector<SDValue> &OutOps) {
603 switch(ConstraintID) {
604 default:
605 llvm_unreachable("Unexpected asm memory constraint");
606 case InlineAsm::ConstraintCode::m:
607 case InlineAsm::ConstraintCode::o:
608 case InlineAsm::ConstraintCode::Q:
609 // We need to make sure that this one operand does not end up in XZR, thus
610 // require the address to be in a PointerRegClass register.
611 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
612 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
613 SDLoc dl(Op);
614 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
615 SDValue NewOp =
616 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
617 dl, Op.getValueType(),
618 Op, RC), 0);
619 OutOps.push_back(NewOp);
620 return false;
621 }
622 return true;
623}
624
625/// SelectArithImmed - Select an immediate value that can be represented as
626/// a 12-bit value shifted left by either 0 or 12. If so, return true with
627/// Val set to the 12-bit value and Shift set to the shifter operand.
628bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
629 SDValue &Shift) {
630 // This function is called from the addsub_shifted_imm ComplexPattern,
631 // which lists [imm] as the list of opcode it's interested in, however
632 // we still need to check whether the operand is actually an immediate
633 // here because the ComplexPattern opcode list is only used in
634 // root-level opcode matching.
635 if (!isa<ConstantSDNode>(N.getNode()))
636 return false;
637
638 uint64_t Immed = N.getNode()->getAsZExtVal();
639 unsigned ShiftAmt;
640
641 if (Immed >> 12 == 0) {
642 ShiftAmt = 0;
643 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
644 ShiftAmt = 12;
645 Immed = Immed >> 12;
646 } else
647 return false;
648
649 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
650 SDLoc dl(N);
651 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
652 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
653 return true;
654}
655
656/// SelectNegArithImmed - As above, but negates the value before trying to
657/// select it.
658bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
659 SDValue &Shift) {
660 // This function is called from the addsub_shifted_imm ComplexPattern,
661 // which lists [imm] as the list of opcode it's interested in, however
662 // we still need to check whether the operand is actually an immediate
663 // here because the ComplexPattern opcode list is only used in
664 // root-level opcode matching.
665 if (!isa<ConstantSDNode>(N.getNode()))
666 return false;
667
668 // The immediate operand must be a 24-bit zero-extended immediate.
669 uint64_t Immed = N.getNode()->getAsZExtVal();
670
671 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
672 // have the opposite effect on the C flag, so this pattern mustn't match under
673 // those circumstances.
674 if (Immed == 0)
675 return false;
676
677 if (N.getValueType() == MVT::i32)
678 Immed = ~((uint32_t)Immed) + 1;
679 else
680 Immed = ~Immed + 1ULL;
681 if (Immed & 0xFFFFFFFFFF000000ULL)
682 return false;
683
684 Immed &= 0xFFFFFFULL;
685 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
686 Shift);
687}
688
689/// getShiftTypeForNode - Translate a shift node to the corresponding
690/// ShiftType value.
692 switch (N.getOpcode()) {
693 default:
695 case ISD::SHL:
696 return AArch64_AM::LSL;
697 case ISD::SRL:
698 return AArch64_AM::LSR;
699 case ISD::SRA:
700 return AArch64_AM::ASR;
701 case ISD::ROTR:
702 return AArch64_AM::ROR;
703 }
704}
705
707 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
708}
709
710/// Determine whether it is worth it to fold SHL into the addressing
711/// mode.
713 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
714 // It is worth folding logical shift of up to three places.
715 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
716 if (!CSD)
717 return false;
718 unsigned ShiftVal = CSD->getZExtValue();
719 if (ShiftVal > 3)
720 return false;
721
722 // Check if this particular node is reused in any non-memory related
723 // operation. If yes, do not try to fold this node into the address
724 // computation, since the computation will be kept.
725 const SDNode *Node = V.getNode();
726 for (SDNode *UI : Node->users())
727 if (!isMemOpOrPrefetch(UI))
728 for (SDNode *UII : UI->users())
729 if (!isMemOpOrPrefetch(UII))
730 return false;
731 return true;
732}
733
734/// Determine whether it is worth to fold V into an extended register addressing
735/// mode.
736bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
737 // Trivial if we are optimizing for code size or if there is only
738 // one use of the value.
739 if (CurDAG->shouldOptForSize() || V.hasOneUse())
740 return true;
741
742 // If a subtarget has a slow shift, folding a shift into multiple loads
743 // costs additional micro-ops.
744 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
745 return false;
746
747 // Check whether we're going to emit the address arithmetic anyway because
748 // it's used by a non-address operation.
749 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
750 return true;
751 if (V.getOpcode() == ISD::ADD) {
752 const SDValue LHS = V.getOperand(0);
753 const SDValue RHS = V.getOperand(1);
754 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
755 return true;
756 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
757 return true;
758 }
759
760 // It hurts otherwise, since the value will be reused.
761 return false;
762}
763
764/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
765/// to select more shifted register
766bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
767 SDValue &Shift) {
768 EVT VT = N.getValueType();
769 if (VT != MVT::i32 && VT != MVT::i64)
770 return false;
771
772 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
773 return false;
774 SDValue LHS = N.getOperand(0);
775 if (!LHS->hasOneUse())
776 return false;
777
778 unsigned LHSOpcode = LHS->getOpcode();
779 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
780 return false;
781
782 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
783 if (!ShiftAmtNode)
784 return false;
785
786 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
787 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
788 if (!RHSC)
789 return false;
790
791 APInt AndMask = RHSC->getAPIntValue();
792 unsigned LowZBits, MaskLen;
793 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
794 return false;
795
796 unsigned BitWidth = N.getValueSizeInBits();
797 SDLoc DL(LHS);
798 uint64_t NewShiftC;
799 unsigned NewShiftOp;
800 if (LHSOpcode == ISD::SHL) {
801 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
802 // BitWidth != LowZBits + MaskLen doesn't match the pattern
803 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
804 return false;
805
806 NewShiftC = LowZBits - ShiftAmtC;
807 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
808 } else {
809 if (LowZBits == 0)
810 return false;
811
812 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
813 NewShiftC = LowZBits + ShiftAmtC;
814 if (NewShiftC >= BitWidth)
815 return false;
816
817 // SRA need all high bits
818 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
819 return false;
820
821 // SRL high bits can be 0 or 1
822 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
823 return false;
824
825 if (LHSOpcode == ISD::SRL)
826 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
827 else
828 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
829 }
830
831 assert(NewShiftC < BitWidth && "Invalid shift amount");
832 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
833 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
834 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
835 NewShiftAmt, BitWidthMinus1),
836 0);
837 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
838 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
839 return true;
840}
841
842/// getExtendTypeForNode - Translate an extend node to the corresponding
843/// ExtendType value.
845getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
846 if (N.getOpcode() == ISD::SIGN_EXTEND ||
847 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
848 EVT SrcVT;
849 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
850 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
851 else
852 SrcVT = N.getOperand(0).getValueType();
853
854 if (!IsLoadStore && SrcVT == MVT::i8)
855 return AArch64_AM::SXTB;
856 else if (!IsLoadStore && SrcVT == MVT::i16)
857 return AArch64_AM::SXTH;
858 else if (SrcVT == MVT::i32)
859 return AArch64_AM::SXTW;
860 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
861
863 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
864 N.getOpcode() == ISD::ANY_EXTEND) {
865 EVT SrcVT = N.getOperand(0).getValueType();
866 if (!IsLoadStore && SrcVT == MVT::i8)
867 return AArch64_AM::UXTB;
868 else if (!IsLoadStore && SrcVT == MVT::i16)
869 return AArch64_AM::UXTH;
870 else if (SrcVT == MVT::i32)
871 return AArch64_AM::UXTW;
872 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
873
875 } else if (N.getOpcode() == ISD::AND) {
876 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
877 if (!CSD)
879 uint64_t AndMask = CSD->getZExtValue();
880
881 switch (AndMask) {
882 default:
884 case 0xFF:
885 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
886 case 0xFFFF:
887 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
888 case 0xFFFFFFFF:
889 return AArch64_AM::UXTW;
890 }
891 }
892
894}
895
896/// Determine whether it is worth to fold V into an extended register of an
897/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
898/// instruction, and the shift should be treated as worth folding even if has
899/// multiple uses.
900bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
901 // Trivial if we are optimizing for code size or if there is only
902 // one use of the value.
903 if (CurDAG->shouldOptForSize() || V.hasOneUse())
904 return true;
905
906 // If a subtarget has a fastpath LSL we can fold a logical shift into
907 // the add/sub and save a cycle.
908 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
909 V.getConstantOperandVal(1) <= 4 &&
911 return true;
912
913 // It hurts otherwise, since the value will be reused.
914 return false;
915}
916
917/// SelectShiftedRegister - Select a "shifted register" operand. If the value
918/// is not shifted, set the Shift operand to default of "LSL 0". The logical
919/// instructions allow the shifted register to be rotated, but the arithmetic
920/// instructions do not. The AllowROR parameter specifies whether ROR is
921/// supported.
922bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
923 SDValue &Reg, SDValue &Shift) {
924 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
925 return true;
926
928 if (ShType == AArch64_AM::InvalidShiftExtend)
929 return false;
930 if (!AllowROR && ShType == AArch64_AM::ROR)
931 return false;
932
933 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
934 unsigned BitSize = N.getValueSizeInBits();
935 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
936 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
937
938 Reg = N.getOperand(0);
939 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
940 return isWorthFoldingALU(N, true);
941 }
942
943 return false;
944}
945
946/// Instructions that accept extend modifiers like UXTW expect the register
947/// being extended to be a GPR32, but the incoming DAG might be acting on a
948/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
949/// this is the case.
951 if (N.getValueType() == MVT::i32)
952 return N;
953
954 SDLoc dl(N);
955 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
956}
957
958// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
959template<signed Low, signed High, signed Scale>
960bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
962 return false;
963
964 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
965 if ((MulImm % std::abs(Scale)) == 0) {
966 int64_t RDVLImm = MulImm / Scale;
967 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
968 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
969 return true;
970 }
971 }
972
973 return false;
974}
975
976// Returns a suitable RDSVL multiplier from a left shift.
977template <signed Low, signed High>
978bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
980 return false;
981
982 int64_t MulImm = 1LL << cast<ConstantSDNode>(N)->getSExtValue();
983 if (MulImm >= Low && MulImm <= High) {
984 Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
985 return true;
986 }
987
988 return false;
989}
990
991/// SelectArithExtendedRegister - Select a "extended register" operand. This
992/// operand folds in an extend followed by an optional left shift.
993bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
994 SDValue &Shift) {
995 unsigned ShiftVal = 0;
997
998 if (N.getOpcode() == ISD::SHL) {
999 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1000 if (!CSD)
1001 return false;
1002 ShiftVal = CSD->getZExtValue();
1003 if (ShiftVal > 4)
1004 return false;
1005
1006 Ext = getExtendTypeForNode(N.getOperand(0));
1008 return false;
1009
1010 Reg = N.getOperand(0).getOperand(0);
1011 } else {
1012 Ext = getExtendTypeForNode(N);
1014 return false;
1015
1016 // Don't match sext of vector extracts. These can use SMOV, but if we match
1017 // this as an extended register, we'll always fold the extend into an ALU op
1018 // user of the extend (which results in a UMOV).
1020 SDValue Op = N.getOperand(0);
1021 if (Op->getOpcode() == ISD::ANY_EXTEND)
1022 Op = Op->getOperand(0);
1023 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
1024 Op.getOperand(0).getValueType().isFixedLengthVector())
1025 return false;
1026 }
1027
1028 Reg = N.getOperand(0);
1029
1030 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
1031 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
1032 auto isDef32 = [](SDValue N) {
1033 unsigned Opc = N.getOpcode();
1034 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
1037 Opc != ISD::FREEZE;
1038 };
1039 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
1040 isDef32(Reg))
1041 return false;
1042 }
1043
1044 // AArch64 mandates that the RHS of the operation must use the smallest
1045 // register class that could contain the size being extended from. Thus,
1046 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1047 // there might not be an actual 32-bit value in the program. We can
1048 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1049 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1050 Reg = narrowIfNeeded(CurDAG, Reg);
1051 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1052 MVT::i32);
1053 return isWorthFoldingALU(N);
1054}
1055
1056/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1057/// operand is referred by the instructions have SP operand
1058bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1059 SDValue &Shift) {
1060 unsigned ShiftVal = 0;
1062
1063 if (N.getOpcode() != ISD::SHL)
1064 return false;
1065
1066 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1067 if (!CSD)
1068 return false;
1069 ShiftVal = CSD->getZExtValue();
1070 if (ShiftVal > 4)
1071 return false;
1072
1073 Ext = AArch64_AM::UXTX;
1074 Reg = N.getOperand(0);
1075 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1076 MVT::i32);
1077 return isWorthFoldingALU(N);
1078}
1079
1080/// If there's a use of this ADDlow that's not itself a load/store then we'll
1081/// need to create a real ADD instruction from it anyway and there's no point in
1082/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1083/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1084/// leads to duplicated ADRP instructions.
1086 for (auto *User : N->users()) {
1087 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1088 User->getOpcode() != ISD::ATOMIC_LOAD &&
1089 User->getOpcode() != ISD::ATOMIC_STORE)
1090 return false;
1091
1092 // ldar and stlr have much more restrictive addressing modes (just a
1093 // register).
1094 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1095 return false;
1096 }
1097
1098 return true;
1099}
1100
1101/// Check if the immediate offset is valid as a scaled immediate.
1102static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1103 unsigned Size) {
1104 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1105 Offset < (Range << Log2_32(Size)))
1106 return true;
1107 return false;
1108}
1109
1110/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1111/// immediate" address. The "Size" argument is the size in bytes of the memory
1112/// reference, which determines the scale.
1113bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1114 unsigned BW, unsigned Size,
1115 SDValue &Base,
1116 SDValue &OffImm) {
1117 SDLoc dl(N);
1118 const DataLayout &DL = CurDAG->getDataLayout();
1119 const TargetLowering *TLI = getTargetLowering();
1120 if (N.getOpcode() == ISD::FrameIndex) {
1121 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1122 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1123 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1124 return true;
1125 }
1126
1127 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1128 // selected here doesn't support labels/immediates, only base+offset.
1129 if (CurDAG->isBaseWithConstantOffset(N)) {
1130 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1131 if (IsSignedImm) {
1132 int64_t RHSC = RHS->getSExtValue();
1133 unsigned Scale = Log2_32(Size);
1134 int64_t Range = 0x1LL << (BW - 1);
1135
1136 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1137 RHSC < (Range << Scale)) {
1138 Base = N.getOperand(0);
1139 if (Base.getOpcode() == ISD::FrameIndex) {
1140 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1141 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1142 }
1143 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1144 return true;
1145 }
1146 } else {
1147 // unsigned Immediate
1148 uint64_t RHSC = RHS->getZExtValue();
1149 unsigned Scale = Log2_32(Size);
1150 uint64_t Range = 0x1ULL << BW;
1151
1152 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1153 Base = N.getOperand(0);
1154 if (Base.getOpcode() == ISD::FrameIndex) {
1155 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1156 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1157 }
1158 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1159 return true;
1160 }
1161 }
1162 }
1163 }
1164 // Base only. The address will be materialized into a register before
1165 // the memory is accessed.
1166 // add x0, Xbase, #offset
1167 // stp x1, x2, [x0]
1168 Base = N;
1169 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1170 return true;
1171}
1172
1173/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1174/// immediate" address. The "Size" argument is the size in bytes of the memory
1175/// reference, which determines the scale.
1176bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1177 SDValue &Base, SDValue &OffImm) {
1178 SDLoc dl(N);
1179 const DataLayout &DL = CurDAG->getDataLayout();
1180 const TargetLowering *TLI = getTargetLowering();
1181 if (N.getOpcode() == ISD::FrameIndex) {
1182 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1183 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1184 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1185 return true;
1186 }
1187
1188 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1189 GlobalAddressSDNode *GAN =
1190 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1191 Base = N.getOperand(0);
1192 OffImm = N.getOperand(1);
1193 if (!GAN)
1194 return true;
1195
1196 if (GAN->getOffset() % Size == 0 &&
1198 return true;
1199 }
1200
1201 if (CurDAG->isBaseWithConstantOffset(N)) {
1202 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1203 int64_t RHSC = (int64_t)RHS->getZExtValue();
1204 unsigned Scale = Log2_32(Size);
1205 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1206 Base = N.getOperand(0);
1207 if (Base.getOpcode() == ISD::FrameIndex) {
1208 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1209 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1210 }
1211 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1212 return true;
1213 }
1214 }
1215 }
1216
1217 // Before falling back to our general case, check if the unscaled
1218 // instructions can handle this. If so, that's preferable.
1219 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1220 return false;
1221
1222 // Base only. The address will be materialized into a register before
1223 // the memory is accessed.
1224 // add x0, Xbase, #offset
1225 // ldr x0, [x0]
1226 Base = N;
1227 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1228 return true;
1229}
1230
1231/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1232/// immediate" address. This should only match when there is an offset that
1233/// is not valid for a scaled immediate addressing mode. The "Size" argument
1234/// is the size in bytes of the memory reference, which is needed here to know
1235/// what is valid for a scaled immediate.
1236bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1237 SDValue &Base,
1238 SDValue &OffImm) {
1239 if (!CurDAG->isBaseWithConstantOffset(N))
1240 return false;
1241 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1242 int64_t RHSC = RHS->getSExtValue();
1243 if (RHSC >= -256 && RHSC < 256) {
1244 Base = N.getOperand(0);
1245 if (Base.getOpcode() == ISD::FrameIndex) {
1246 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1247 const TargetLowering *TLI = getTargetLowering();
1248 Base = CurDAG->getTargetFrameIndex(
1249 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1250 }
1251 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1252 return true;
1253 }
1254 }
1255 return false;
1256}
1257
1259 SDLoc dl(N);
1260 SDValue ImpDef = SDValue(
1261 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1262 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1263 N);
1264}
1265
1266/// Check if the given SHL node (\p N), can be used to form an
1267/// extended register for an addressing mode.
1268bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1269 bool WantExtend, SDValue &Offset,
1270 SDValue &SignExtend) {
1271 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1272 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1273 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1274 return false;
1275
1276 SDLoc dl(N);
1277 if (WantExtend) {
1279 getExtendTypeForNode(N.getOperand(0), true);
1281 return false;
1282
1283 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1284 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1285 MVT::i32);
1286 } else {
1287 Offset = N.getOperand(0);
1288 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1289 }
1290
1291 unsigned LegalShiftVal = Log2_32(Size);
1292 unsigned ShiftVal = CSD->getZExtValue();
1293
1294 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1295 return false;
1296
1297 return isWorthFoldingAddr(N, Size);
1298}
1299
1300bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1302 SDValue &SignExtend,
1303 SDValue &DoShift) {
1304 if (N.getOpcode() != ISD::ADD)
1305 return false;
1306 SDValue LHS = N.getOperand(0);
1307 SDValue RHS = N.getOperand(1);
1308 SDLoc dl(N);
1309
1310 // We don't want to match immediate adds here, because they are better lowered
1311 // to the register-immediate addressing modes.
1313 return false;
1314
1315 // Check if this particular node is reused in any non-memory related
1316 // operation. If yes, do not try to fold this node into the address
1317 // computation, since the computation will be kept.
1318 const SDNode *Node = N.getNode();
1319 for (SDNode *UI : Node->users()) {
1320 if (!isMemOpOrPrefetch(UI))
1321 return false;
1322 }
1323
1324 // Remember if it is worth folding N when it produces extended register.
1325 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1326
1327 // Try to match a shifted extend on the RHS.
1328 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1329 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1330 Base = LHS;
1331 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1332 return true;
1333 }
1334
1335 // Try to match a shifted extend on the LHS.
1336 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1337 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1338 Base = RHS;
1339 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1340 return true;
1341 }
1342
1343 // There was no shift, whatever else we find.
1344 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1345
1347 // Try to match an unshifted extend on the LHS.
1348 if (IsExtendedRegisterWorthFolding &&
1349 (Ext = getExtendTypeForNode(LHS, true)) !=
1351 Base = RHS;
1352 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1353 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1354 MVT::i32);
1355 if (isWorthFoldingAddr(LHS, Size))
1356 return true;
1357 }
1358
1359 // Try to match an unshifted extend on the RHS.
1360 if (IsExtendedRegisterWorthFolding &&
1361 (Ext = getExtendTypeForNode(RHS, true)) !=
1363 Base = LHS;
1364 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1365 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1366 MVT::i32);
1367 if (isWorthFoldingAddr(RHS, Size))
1368 return true;
1369 }
1370
1371 return false;
1372}
1373
1374// Check if the given immediate is preferred by ADD. If an immediate can be
1375// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1376// encoded by one MOVZ, return true.
1377static bool isPreferredADD(int64_t ImmOff) {
1378 // Constant in [0x0, 0xfff] can be encoded in ADD.
1379 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1380 return true;
1381 // Check if it can be encoded in an "ADD LSL #12".
1382 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1383 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1384 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1385 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1386 return false;
1387}
1388
1389bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1391 SDValue &SignExtend,
1392 SDValue &DoShift) {
1393 if (N.getOpcode() != ISD::ADD)
1394 return false;
1395 SDValue LHS = N.getOperand(0);
1396 SDValue RHS = N.getOperand(1);
1397 SDLoc DL(N);
1398
1399 // Check if this particular node is reused in any non-memory related
1400 // operation. If yes, do not try to fold this node into the address
1401 // computation, since the computation will be kept.
1402 const SDNode *Node = N.getNode();
1403 for (SDNode *UI : Node->users()) {
1404 if (!isMemOpOrPrefetch(UI))
1405 return false;
1406 }
1407
1408 // Watch out if RHS is a wide immediate, it can not be selected into
1409 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1410 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1411 // instructions like:
1412 // MOV X0, WideImmediate
1413 // ADD X1, BaseReg, X0
1414 // LDR X2, [X1, 0]
1415 // For such situation, using [BaseReg, XReg] addressing mode can save one
1416 // ADD/SUB:
1417 // MOV X0, WideImmediate
1418 // LDR X2, [BaseReg, X0]
1419 if (isa<ConstantSDNode>(RHS)) {
1420 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1421 // Skip the immediate can be selected by load/store addressing mode.
1422 // Also skip the immediate can be encoded by a single ADD (SUB is also
1423 // checked by using -ImmOff).
1424 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1425 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1426 return false;
1427
1428 SDValue Ops[] = { RHS };
1429 SDNode *MOVI =
1430 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1431 SDValue MOVIV = SDValue(MOVI, 0);
1432 // This ADD of two X register will be selected into [Reg+Reg] mode.
1433 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1434 }
1435
1436 // Remember if it is worth folding N when it produces extended register.
1437 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1438
1439 // Try to match a shifted extend on the RHS.
1440 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1441 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1442 Base = LHS;
1443 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1444 return true;
1445 }
1446
1447 // Try to match a shifted extend on the LHS.
1448 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1449 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1450 Base = RHS;
1451 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1452 return true;
1453 }
1454
1455 // Match any non-shifted, non-extend, non-immediate add expression.
1456 Base = LHS;
1457 Offset = RHS;
1458 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1459 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1460 // Reg1 + Reg2 is free: no check needed.
1461 return true;
1462}
1463
1464SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1465 static const unsigned RegClassIDs[] = {
1466 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1467 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1468 AArch64::dsub2, AArch64::dsub3};
1469
1470 return createTuple(Regs, RegClassIDs, SubRegs);
1471}
1472
1473SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1474 static const unsigned RegClassIDs[] = {
1475 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1476 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1477 AArch64::qsub2, AArch64::qsub3};
1478
1479 return createTuple(Regs, RegClassIDs, SubRegs);
1480}
1481
1482SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1483 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1484 AArch64::ZPR3RegClassID,
1485 AArch64::ZPR4RegClassID};
1486 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1487 AArch64::zsub2, AArch64::zsub3};
1488
1489 return createTuple(Regs, RegClassIDs, SubRegs);
1490}
1491
1492SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1493 assert(Regs.size() == 2 || Regs.size() == 4);
1494
1495 // The createTuple interface requires 3 RegClassIDs for each possible
1496 // tuple type even though we only have them for ZPR2 and ZPR4.
1497 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1498 AArch64::ZPR4Mul4RegClassID};
1499 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1500 AArch64::zsub2, AArch64::zsub3};
1501 return createTuple(Regs, RegClassIDs, SubRegs);
1502}
1503
1504SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1505 const unsigned RegClassIDs[],
1506 const unsigned SubRegs[]) {
1507 // There's no special register-class for a vector-list of 1 element: it's just
1508 // a vector.
1509 if (Regs.size() == 1)
1510 return Regs[0];
1511
1512 assert(Regs.size() >= 2 && Regs.size() <= 4);
1513
1514 SDLoc DL(Regs[0]);
1515
1517
1518 // First operand of REG_SEQUENCE is the desired RegClass.
1519 Ops.push_back(
1520 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1521
1522 // Then we get pairs of source & subregister-position for the components.
1523 for (unsigned i = 0; i < Regs.size(); ++i) {
1524 Ops.push_back(Regs[i]);
1525 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1526 }
1527
1528 SDNode *N =
1529 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1530 return SDValue(N, 0);
1531}
1532
1533void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1534 bool isExt) {
1535 SDLoc dl(N);
1536 EVT VT = N->getValueType(0);
1537
1538 unsigned ExtOff = isExt;
1539
1540 // Form a REG_SEQUENCE to force register allocation.
1541 unsigned Vec0Off = ExtOff + 1;
1542 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1543 SDValue RegSeq = createQTuple(Regs);
1544
1546 if (isExt)
1547 Ops.push_back(N->getOperand(1));
1548 Ops.push_back(RegSeq);
1549 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1550 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1551}
1552
1553static std::tuple<SDValue, SDValue>
1555 SDLoc DL(Disc);
1556 SDValue AddrDisc;
1557 SDValue ConstDisc;
1558
1559 // If this is a blend, remember the constant and address discriminators.
1560 // Otherwise, it's either a constant discriminator, or a non-blended
1561 // address discriminator.
1562 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1563 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1564 AddrDisc = Disc->getOperand(1);
1565 ConstDisc = Disc->getOperand(2);
1566 } else {
1567 ConstDisc = Disc;
1568 }
1569
1570 // If the constant discriminator (either the blend RHS, or the entire
1571 // discriminator value) isn't a 16-bit constant, bail out, and let the
1572 // discriminator be computed separately.
1573 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1574 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1575 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1576
1577 // If there's no address discriminator, use XZR directly.
1578 if (!AddrDisc)
1579 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1580
1581 return std::make_tuple(
1582 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1583 AddrDisc);
1584}
1585
1586void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1587 SDLoc DL(N);
1588 // IntrinsicID is operand #0
1589 SDValue Val = N->getOperand(1);
1590 SDValue AUTKey = N->getOperand(2);
1591 SDValue AUTDisc = N->getOperand(3);
1592
1593 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1594 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1595
1596 SDValue AUTAddrDisc, AUTConstDisc;
1597 std::tie(AUTConstDisc, AUTAddrDisc) =
1598 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1599
1600 if (!Subtarget->isX16X17Safer()) {
1601 std::vector<SDValue> Ops = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1602 // Copy deactivation symbol if present.
1603 if (N->getNumOperands() > 4)
1604 Ops.push_back(N->getOperand(4));
1605
1606 SDNode *AUT =
1607 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1608 ReplaceNode(N, AUT);
1609 } else {
1610 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1611 AArch64::X16, Val, SDValue());
1612 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1613
1614 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1615 ReplaceNode(N, AUT);
1616 }
1617}
1618
1619void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1620 SDLoc DL(N);
1621 // IntrinsicID is operand #0, if W_CHAIN it is #1
1622 int OffsetBase = N->getOpcode() == ISD::INTRINSIC_W_CHAIN ? 1 : 0;
1623 SDValue Val = N->getOperand(OffsetBase + 1);
1624 SDValue AUTKey = N->getOperand(OffsetBase + 2);
1625 SDValue AUTDisc = N->getOperand(OffsetBase + 3);
1626 SDValue PACKey = N->getOperand(OffsetBase + 4);
1627 SDValue PACDisc = N->getOperand(OffsetBase + 5);
1628 uint32_t IntNum = N->getConstantOperandVal(OffsetBase + 0);
1629 bool HasLoad = IntNum == Intrinsic::ptrauth_resign_load_relative;
1630
1631 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1632 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1633
1634 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1635 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1636
1637 SDValue AUTAddrDisc, AUTConstDisc;
1638 std::tie(AUTConstDisc, AUTAddrDisc) =
1639 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1640
1641 SDValue PACAddrDisc, PACConstDisc;
1642 std::tie(PACConstDisc, PACAddrDisc) =
1643 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1644
1645 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1646 AArch64::X16, Val, SDValue());
1647
1648 if (HasLoad) {
1649 SDValue Addend = N->getOperand(OffsetBase + 6);
1650 SDValue IncomingChain = N->getOperand(0);
1651 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc,
1652 PACKey, PACConstDisc, PACAddrDisc,
1653 Addend, IncomingChain, X16Copy.getValue(1)};
1654
1655 SDNode *AUTRELLOADPAC = CurDAG->getMachineNode(AArch64::AUTRELLOADPAC, DL,
1656 MVT::i64, MVT::Other, Ops);
1657 ReplaceNode(N, AUTRELLOADPAC);
1658 } else {
1659 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1660 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1661
1662 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1663 ReplaceNode(N, AUTPAC);
1664 }
1665}
1666
1667bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1668 LoadSDNode *LD = cast<LoadSDNode>(N);
1669 if (LD->isUnindexed())
1670 return false;
1671 EVT VT = LD->getMemoryVT();
1672 EVT DstVT = N->getValueType(0);
1673 ISD::MemIndexedMode AM = LD->getAddressingMode();
1674 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1675 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1676 int OffsetVal = (int)OffsetOp->getZExtValue();
1677
1678 // We're not doing validity checking here. That was done when checking
1679 // if we should mark the load as indexed or not. We're just selecting
1680 // the right instruction.
1681 unsigned Opcode = 0;
1682
1683 ISD::LoadExtType ExtType = LD->getExtensionType();
1684 bool InsertTo64 = false;
1685 if (VT == MVT::i64)
1686 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1687 else if (VT == MVT::i32) {
1688 if (ExtType == ISD::NON_EXTLOAD)
1689 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1690 else if (ExtType == ISD::SEXTLOAD)
1691 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1692 else {
1693 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1694 InsertTo64 = true;
1695 // The result of the load is only i32. It's the subreg_to_reg that makes
1696 // it into an i64.
1697 DstVT = MVT::i32;
1698 }
1699 } else if (VT == MVT::i16) {
1700 if (ExtType == ISD::SEXTLOAD) {
1701 if (DstVT == MVT::i64)
1702 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1703 else
1704 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1705 } else {
1706 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1707 InsertTo64 = DstVT == MVT::i64;
1708 // The result of the load is only i32. It's the subreg_to_reg that makes
1709 // it into an i64.
1710 DstVT = MVT::i32;
1711 }
1712 } else if (VT == MVT::i8) {
1713 if (ExtType == ISD::SEXTLOAD) {
1714 if (DstVT == MVT::i64)
1715 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1716 else
1717 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1718 } else {
1719 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1720 InsertTo64 = DstVT == MVT::i64;
1721 // The result of the load is only i32. It's the subreg_to_reg that makes
1722 // it into an i64.
1723 DstVT = MVT::i32;
1724 }
1725 } else if (VT == MVT::f16) {
1726 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1727 } else if (VT == MVT::bf16) {
1728 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1729 } else if (VT == MVT::f32) {
1730 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1731 } else if (VT == MVT::f64 ||
1732 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1733 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1734 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1735 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1736 } else if (VT.is64BitVector()) {
1737 if (IsPre || OffsetVal != 8)
1738 return false;
1739 switch (VT.getScalarSizeInBits()) {
1740 case 8:
1741 Opcode = AArch64::LD1Onev8b_POST;
1742 break;
1743 case 16:
1744 Opcode = AArch64::LD1Onev4h_POST;
1745 break;
1746 case 32:
1747 Opcode = AArch64::LD1Onev2s_POST;
1748 break;
1749 case 64:
1750 Opcode = AArch64::LD1Onev1d_POST;
1751 break;
1752 default:
1753 llvm_unreachable("Expected vector element to be a power of 2");
1754 }
1755 } else if (VT.is128BitVector()) {
1756 if (IsPre || OffsetVal != 16)
1757 return false;
1758 switch (VT.getScalarSizeInBits()) {
1759 case 8:
1760 Opcode = AArch64::LD1Onev16b_POST;
1761 break;
1762 case 16:
1763 Opcode = AArch64::LD1Onev8h_POST;
1764 break;
1765 case 32:
1766 Opcode = AArch64::LD1Onev4s_POST;
1767 break;
1768 case 64:
1769 Opcode = AArch64::LD1Onev2d_POST;
1770 break;
1771 default:
1772 llvm_unreachable("Expected vector element to be a power of 2");
1773 }
1774 } else
1775 return false;
1776 SDValue Chain = LD->getChain();
1777 SDValue Base = LD->getBasePtr();
1778 SDLoc dl(N);
1779 // LD1 encodes an immediate offset by using XZR as the offset register.
1780 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1781 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1782 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1783 SDValue Ops[] = { Base, Offset, Chain };
1784 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1785 MVT::Other, Ops);
1786
1787 // Transfer memoperands.
1788 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1789 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1790
1791 // Either way, we're replacing the node, so tell the caller that.
1792 SDValue LoadedVal = SDValue(Res, 1);
1793 if (InsertTo64) {
1794 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1795 LoadedVal = SDValue(CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, dl,
1796 MVT::i64, LoadedVal, SubReg),
1797 0);
1798 }
1799
1800 ReplaceUses(SDValue(N, 0), LoadedVal);
1801 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1802 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1803 CurDAG->RemoveDeadNode(N);
1804 return true;
1805}
1806
1807void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1808 unsigned SubRegIdx) {
1809 SDLoc dl(N);
1810 EVT VT = N->getValueType(0);
1811 SDValue Chain = N->getOperand(0);
1812
1813 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1814 Chain};
1815
1816 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1817
1818 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1819 SDValue SuperReg = SDValue(Ld, 0);
1820 for (unsigned i = 0; i < NumVecs; ++i)
1821 ReplaceUses(SDValue(N, i),
1822 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1823
1824 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1825
1826 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1827 // because it's too simple to have needed special treatment during lowering.
1828 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1829 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1830 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1831 }
1832
1833 CurDAG->RemoveDeadNode(N);
1834}
1835
1836void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1837 unsigned Opc, unsigned SubRegIdx) {
1838 SDLoc dl(N);
1839 EVT VT = N->getValueType(0);
1840 SDValue Chain = N->getOperand(0);
1841
1842 SDValue Ops[] = {N->getOperand(1), // Mem operand
1843 N->getOperand(2), // Incremental
1844 Chain};
1845
1846 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1847 MVT::Untyped, MVT::Other};
1848
1849 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1850
1851 // Update uses of write back register
1852 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1853
1854 // Update uses of vector list
1855 SDValue SuperReg = SDValue(Ld, 1);
1856 if (NumVecs == 1)
1857 ReplaceUses(SDValue(N, 0), SuperReg);
1858 else
1859 for (unsigned i = 0; i < NumVecs; ++i)
1860 ReplaceUses(SDValue(N, i),
1861 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1862
1863 // Update the chain
1864 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1865 CurDAG->RemoveDeadNode(N);
1866}
1867
1868/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1869/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1870/// new Base and an SDValue representing the new offset.
1871std::tuple<unsigned, SDValue, SDValue>
1872AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1873 unsigned Opc_ri,
1874 const SDValue &OldBase,
1875 const SDValue &OldOffset,
1876 unsigned Scale) {
1877 SDValue NewBase = OldBase;
1878 SDValue NewOffset = OldOffset;
1879 // Detect a possible Reg+Imm addressing mode.
1880 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1881 N, OldBase, NewBase, NewOffset);
1882
1883 // Detect a possible reg+reg addressing mode, but only if we haven't already
1884 // detected a Reg+Imm one.
1885 const bool IsRegReg =
1886 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1887
1888 // Select the instruction.
1889 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1890}
1891
1892enum class SelectTypeKind {
1893 Int1 = 0,
1894 Int = 1,
1895 FP = 2,
1897};
1898
1899/// This function selects an opcode from a list of opcodes, which is
1900/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1901/// element types, in this order.
1902template <SelectTypeKind Kind>
1903static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1904 // Only match scalable vector VTs
1905 if (!VT.isScalableVector())
1906 return 0;
1907
1908 EVT EltVT = VT.getVectorElementType();
1909 unsigned Key = VT.getVectorMinNumElements();
1910 switch (Kind) {
1912 break;
1914 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1915 EltVT != MVT::i64)
1916 return 0;
1917 break;
1919 if (EltVT != MVT::i1)
1920 return 0;
1921 break;
1922 case SelectTypeKind::FP:
1923 if (EltVT == MVT::bf16)
1924 Key = 16;
1925 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1926 EltVT != MVT::f64)
1927 return 0;
1928 break;
1929 }
1930
1931 unsigned Offset;
1932 switch (Key) {
1933 case 16: // 8-bit or bf16
1934 Offset = 0;
1935 break;
1936 case 8: // 16-bit
1937 Offset = 1;
1938 break;
1939 case 4: // 32-bit
1940 Offset = 2;
1941 break;
1942 case 2: // 64-bit
1943 Offset = 3;
1944 break;
1945 default:
1946 return 0;
1947 }
1948
1949 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1950}
1951
1952// This function is almost identical to SelectWhilePair, but has an
1953// extra check on the range of the immediate operand.
1954// TODO: Merge these two functions together at some point?
1955void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1956 // Immediate can be either 0 or 1.
1957 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1958 if (Imm->getZExtValue() > 1)
1959 return;
1960
1961 SDLoc DL(N);
1962 EVT VT = N->getValueType(0);
1963 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1964 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1965 SDValue SuperReg = SDValue(WhilePair, 0);
1966
1967 for (unsigned I = 0; I < 2; ++I)
1968 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1969 AArch64::psub0 + I, DL, VT, SuperReg));
1970
1971 CurDAG->RemoveDeadNode(N);
1972}
1973
1974void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1975 SDLoc DL(N);
1976 EVT VT = N->getValueType(0);
1977
1978 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1979
1980 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1981 SDValue SuperReg = SDValue(WhilePair, 0);
1982
1983 for (unsigned I = 0; I < 2; ++I)
1984 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1985 AArch64::psub0 + I, DL, VT, SuperReg));
1986
1987 CurDAG->RemoveDeadNode(N);
1988}
1989
1990void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1991 unsigned Opcode) {
1992 EVT VT = N->getValueType(0);
1993 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
1994 SDValue Ops = createZTuple(Regs);
1995 SDLoc DL(N);
1996 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1997 SDValue SuperReg = SDValue(Intrinsic, 0);
1998 for (unsigned i = 0; i < NumVecs; ++i)
1999 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2000 AArch64::zsub0 + i, DL, VT, SuperReg));
2001
2002 CurDAG->RemoveDeadNode(N);
2003}
2004
2005void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
2006 unsigned Opcode) {
2007 SDLoc DL(N);
2008 EVT VT = N->getValueType(0);
2009 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
2010 Ops.push_back(/*Chain*/ N->getOperand(0));
2011
2012 SDNode *Instruction =
2013 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
2014 SDValue SuperReg = SDValue(Instruction, 0);
2015
2016 for (unsigned i = 0; i < NumVecs; ++i)
2017 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2018 AArch64::zsub0 + i, DL, VT, SuperReg));
2019
2020 // Copy chain
2021 unsigned ChainIdx = NumVecs;
2022 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
2023 CurDAG->RemoveDeadNode(N);
2024}
2025
2026void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
2027 unsigned NumVecs,
2028 bool IsZmMulti,
2029 unsigned Opcode,
2030 bool HasPred) {
2031 assert(Opcode != 0 && "Unexpected opcode");
2032
2033 SDLoc DL(N);
2034 EVT VT = N->getValueType(0);
2035 SDUse *OpsIter = N->op_begin() + 1; // Skip intrinsic ID
2037
2038 auto GetMultiVecOperand = [&]() {
2039 SmallVector<SDValue, 4> Regs(OpsIter, OpsIter + NumVecs);
2040 OpsIter += NumVecs;
2041 return createZMulTuple(Regs);
2042 };
2043
2044 if (HasPred)
2045 Ops.push_back(*OpsIter++);
2046
2047 Ops.push_back(GetMultiVecOperand());
2048 if (IsZmMulti)
2049 Ops.push_back(GetMultiVecOperand());
2050 else
2051 Ops.push_back(*OpsIter++);
2052
2053 // Append any remaining operands.
2054 Ops.append(OpsIter, N->op_end());
2055 SDNode *Intrinsic;
2056 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
2057 SDValue SuperReg = SDValue(Intrinsic, 0);
2058 for (unsigned i = 0; i < NumVecs; ++i)
2059 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2060 AArch64::zsub0 + i, DL, VT, SuperReg));
2061
2062 CurDAG->RemoveDeadNode(N);
2063}
2064
2065void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2066 unsigned Scale, unsigned Opc_ri,
2067 unsigned Opc_rr, bool IsIntr) {
2068 assert(Scale < 5 && "Invalid scaling value.");
2069 SDLoc DL(N);
2070 EVT VT = N->getValueType(0);
2071 SDValue Chain = N->getOperand(0);
2072
2073 // Optimize addressing mode.
2075 unsigned Opc;
2076 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2077 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
2078 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2079
2080 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2081 Base, // Memory operand
2082 Offset, Chain};
2083
2084 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2085
2086 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2087 SDValue SuperReg = SDValue(Load, 0);
2088 for (unsigned i = 0; i < NumVecs; ++i)
2089 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2090 AArch64::zsub0 + i, DL, VT, SuperReg));
2091
2092 // Copy chain
2093 unsigned ChainIdx = NumVecs;
2094 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2095 CurDAG->RemoveDeadNode(N);
2096}
2097
2098void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2099 unsigned NumVecs,
2100 unsigned Scale,
2101 unsigned Opc_ri,
2102 unsigned Opc_rr) {
2103 assert(Scale < 4 && "Invalid scaling value.");
2104 SDLoc DL(N);
2105 EVT VT = N->getValueType(0);
2106 SDValue Chain = N->getOperand(0);
2107
2108 SDValue PNg = N->getOperand(2);
2109 SDValue Base = N->getOperand(3);
2110 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2111 unsigned Opc;
2112 std::tie(Opc, Base, Offset) =
2113 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2114
2115 SDValue Ops[] = {PNg, // Predicate-as-counter
2116 Base, // Memory operand
2117 Offset, Chain};
2118
2119 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2120
2121 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2122 SDValue SuperReg = SDValue(Load, 0);
2123 for (unsigned i = 0; i < NumVecs; ++i)
2124 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2125 AArch64::zsub0 + i, DL, VT, SuperReg));
2126
2127 // Copy chain
2128 unsigned ChainIdx = NumVecs;
2129 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2130 CurDAG->RemoveDeadNode(N);
2131}
2132
2133void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2134 unsigned Opcode) {
2135 if (N->getValueType(0) != MVT::nxv4f32)
2136 return;
2137 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2138}
2139
2140void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2141 unsigned NumOutVecs,
2142 unsigned Opc,
2143 uint32_t MaxImm) {
2144 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2145 if (Imm->getZExtValue() > MaxImm)
2146 return;
2147
2148 SDValue ZtValue;
2149 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2150 return;
2151
2152 SDValue Chain = Node->getOperand(0);
2153 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4), Chain};
2154 SDLoc DL(Node);
2155 EVT VT = Node->getValueType(0);
2156
2157 SDNode *Instruction =
2158 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2159 SDValue SuperReg = SDValue(Instruction, 0);
2160
2161 for (unsigned I = 0; I < NumOutVecs; ++I)
2162 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2163 AArch64::zsub0 + I, DL, VT, SuperReg));
2164
2165 // Copy chain
2166 unsigned ChainIdx = NumOutVecs;
2167 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2168 CurDAG->RemoveDeadNode(Node);
2169}
2170
2171void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2172 unsigned NumOutVecs,
2173 unsigned Opc) {
2174 SDValue ZtValue;
2175 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2176 return;
2177
2178 SDValue Chain = Node->getOperand(0);
2179 SDValue Ops[] = {ZtValue,
2180 createZMulTuple({Node->getOperand(3), Node->getOperand(4)}),
2181 Chain};
2182
2183 SDLoc DL(Node);
2184 EVT VT = Node->getValueType(0);
2185
2186 SDNode *Instruction =
2187 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2188 SDValue SuperReg = SDValue(Instruction, 0);
2189
2190 for (unsigned I = 0; I < NumOutVecs; ++I)
2191 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2192 AArch64::zsub0 + I, DL, VT, SuperReg));
2193
2194 // Copy chain
2195 unsigned ChainIdx = NumOutVecs;
2196 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2197 CurDAG->RemoveDeadNode(Node);
2198}
2199
2200void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2201 unsigned Op) {
2202 SDLoc DL(N);
2203 EVT VT = N->getValueType(0);
2204
2205 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2206 SDValue Zd = createZMulTuple(Regs);
2207 SDValue Zn = N->getOperand(1 + NumVecs);
2208 SDValue Zm = N->getOperand(2 + NumVecs);
2209
2210 SDValue Ops[] = {Zd, Zn, Zm};
2211
2212 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2213 SDValue SuperReg = SDValue(Intrinsic, 0);
2214 for (unsigned i = 0; i < NumVecs; ++i)
2215 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2216 AArch64::zsub0 + i, DL, VT, SuperReg));
2217
2218 CurDAG->RemoveDeadNode(N);
2219}
2220
2221bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2222 switch (BaseReg) {
2223 default:
2224 return false;
2225 case AArch64::ZA:
2226 case AArch64::ZAB0:
2227 if (TileNum == 0)
2228 break;
2229 return false;
2230 case AArch64::ZAH0:
2231 if (TileNum <= 1)
2232 break;
2233 return false;
2234 case AArch64::ZAS0:
2235 if (TileNum <= 3)
2236 break;
2237 return false;
2238 case AArch64::ZAD0:
2239 if (TileNum <= 7)
2240 break;
2241 return false;
2242 }
2243
2244 BaseReg += TileNum;
2245 return true;
2246}
2247
2248template <unsigned MaxIdx, unsigned Scale>
2249void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2250 unsigned BaseReg, unsigned Op) {
2251 unsigned TileNum = 0;
2252 if (BaseReg != AArch64::ZA)
2253 TileNum = N->getConstantOperandVal(2);
2254
2255 if (!SelectSMETile(BaseReg, TileNum))
2256 return;
2257
2258 SDValue SliceBase, Base, Offset;
2259 if (BaseReg == AArch64::ZA)
2260 SliceBase = N->getOperand(2);
2261 else
2262 SliceBase = N->getOperand(3);
2263
2264 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2265 return;
2266
2267 SDLoc DL(N);
2268 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2269 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2270 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2271
2272 EVT VT = N->getValueType(0);
2273 for (unsigned I = 0; I < NumVecs; ++I)
2274 ReplaceUses(SDValue(N, I),
2275 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2276 SDValue(Mov, 0)));
2277 // Copy chain
2278 unsigned ChainIdx = NumVecs;
2279 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2280 CurDAG->RemoveDeadNode(N);
2281}
2282
2283void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2284 unsigned Op, unsigned MaxIdx,
2285 unsigned Scale, unsigned BaseReg) {
2286 // Slice can be in different positions
2287 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2288 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2289 SDValue SliceBase = N->getOperand(2);
2290 if (BaseReg != AArch64::ZA)
2291 SliceBase = N->getOperand(3);
2292
2294 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2295 return;
2296 // The correct Za tile number is computed in Machine Instruction
2297 // See EmitZAInstr
2298 // DAG cannot select Za tile as an output register with ZReg
2299 SDLoc DL(N);
2301 if (BaseReg != AArch64::ZA )
2302 Ops.push_back(N->getOperand(2));
2303 Ops.push_back(Base);
2304 Ops.push_back(Offset);
2305 Ops.push_back(N->getOperand(0)); //Chain
2306 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2307
2308 EVT VT = N->getValueType(0);
2309 for (unsigned I = 0; I < NumVecs; ++I)
2310 ReplaceUses(SDValue(N, I),
2311 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2312 SDValue(Mov, 0)));
2313
2314 // Copy chain
2315 unsigned ChainIdx = NumVecs;
2316 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2317 CurDAG->RemoveDeadNode(N);
2318}
2319
2320void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2321 unsigned NumOutVecs,
2322 bool IsTupleInput,
2323 unsigned Opc) {
2324 SDLoc DL(N);
2325 EVT VT = N->getValueType(0);
2326 unsigned NumInVecs = N->getNumOperands() - 1;
2327
2329 if (IsTupleInput) {
2330 assert((NumInVecs == 2 || NumInVecs == 4) &&
2331 "Don't know how to handle multi-register input!");
2332 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2333 Ops.push_back(createZMulTuple(Regs));
2334 } else {
2335 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2336 for (unsigned I = 0; I < NumInVecs; I++)
2337 Ops.push_back(N->getOperand(1 + I));
2338 }
2339
2340 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2341 SDValue SuperReg = SDValue(Res, 0);
2342
2343 for (unsigned I = 0; I < NumOutVecs; I++)
2344 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2345 AArch64::zsub0 + I, DL, VT, SuperReg));
2346 CurDAG->RemoveDeadNode(N);
2347}
2348
2349void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2350 unsigned Opc) {
2351 SDLoc dl(N);
2352 EVT VT = N->getOperand(2)->getValueType(0);
2353
2354 // Form a REG_SEQUENCE to force register allocation.
2355 bool Is128Bit = VT.getSizeInBits() == 128;
2356 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2357 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2358
2359 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2360 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2361
2362 // Transfer memoperands.
2363 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2364 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2365
2366 ReplaceNode(N, St);
2367}
2368
2369void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2370 unsigned Scale, unsigned Opc_rr,
2371 unsigned Opc_ri) {
2372 SDLoc dl(N);
2373
2374 // Form a REG_SEQUENCE to force register allocation.
2375 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2376 SDValue RegSeq = createZTuple(Regs);
2377
2378 // Optimize addressing mode.
2379 unsigned Opc;
2381 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2382 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2383 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2384
2385 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2386 Base, // address
2387 Offset, // offset
2388 N->getOperand(0)}; // chain
2389 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2390
2391 ReplaceNode(N, St);
2392}
2393
2394bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2395 SDValue &OffImm) {
2396 SDLoc dl(N);
2397 const DataLayout &DL = CurDAG->getDataLayout();
2398 const TargetLowering *TLI = getTargetLowering();
2399
2400 // Try to match it for the frame address
2401 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2402 int FI = FINode->getIndex();
2403 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2404 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2405 return true;
2406 }
2407
2408 return false;
2409}
2410
2411void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2412 unsigned Opc) {
2413 SDLoc dl(N);
2414 EVT VT = N->getOperand(2)->getValueType(0);
2415 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2416 MVT::Other}; // Type for the Chain
2417
2418 // Form a REG_SEQUENCE to force register allocation.
2419 bool Is128Bit = VT.getSizeInBits() == 128;
2420 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2421 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2422
2423 SDValue Ops[] = {RegSeq,
2424 N->getOperand(NumVecs + 1), // base register
2425 N->getOperand(NumVecs + 2), // Incremental
2426 N->getOperand(0)}; // Chain
2427 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2428
2429 ReplaceNode(N, St);
2430}
2431
2432namespace {
2433/// WidenVector - Given a value in the V64 register class, produce the
2434/// equivalent value in the V128 register class.
2435class WidenVector {
2436 SelectionDAG &DAG;
2437
2438public:
2439 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2440
2441 SDValue operator()(SDValue V64Reg) {
2442 EVT VT = V64Reg.getValueType();
2443 unsigned NarrowSize = VT.getVectorNumElements();
2444 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2445 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2446 SDLoc DL(V64Reg);
2447
2448 SDValue Undef =
2449 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2450 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2451 }
2452};
2453} // namespace
2454
2455/// NarrowVector - Given a value in the V128 register class, produce the
2456/// equivalent value in the V64 register class.
2458 EVT VT = V128Reg.getValueType();
2459 unsigned WideSize = VT.getVectorNumElements();
2460 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2461 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2462
2463 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2464 V128Reg);
2465}
2466
2467void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2468 unsigned Opc) {
2469 SDLoc dl(N);
2470 EVT VT = N->getValueType(0);
2471 bool Narrow = VT.getSizeInBits() == 64;
2472
2473 // Form a REG_SEQUENCE to force register allocation.
2474 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2475
2476 if (Narrow)
2477 transform(Regs, Regs.begin(),
2478 WidenVector(*CurDAG));
2479
2480 SDValue RegSeq = createQTuple(Regs);
2481
2482 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2483
2484 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2485
2486 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2487 N->getOperand(NumVecs + 3), N->getOperand(0)};
2488 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2489 SDValue SuperReg = SDValue(Ld, 0);
2490
2491 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2492 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2493 AArch64::qsub2, AArch64::qsub3 };
2494 for (unsigned i = 0; i < NumVecs; ++i) {
2495 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2496 if (Narrow)
2497 NV = NarrowVector(NV, *CurDAG);
2498 ReplaceUses(SDValue(N, i), NV);
2499 }
2500
2501 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2502 CurDAG->RemoveDeadNode(N);
2503}
2504
2505void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2506 unsigned Opc) {
2507 SDLoc dl(N);
2508 EVT VT = N->getValueType(0);
2509 bool Narrow = VT.getSizeInBits() == 64;
2510
2511 // Form a REG_SEQUENCE to force register allocation.
2512 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2513
2514 if (Narrow)
2515 transform(Regs, Regs.begin(),
2516 WidenVector(*CurDAG));
2517
2518 SDValue RegSeq = createQTuple(Regs);
2519
2520 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2521 RegSeq->getValueType(0), MVT::Other};
2522
2523 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2524
2525 SDValue Ops[] = {RegSeq,
2526 CurDAG->getTargetConstant(LaneNo, dl,
2527 MVT::i64), // Lane Number
2528 N->getOperand(NumVecs + 2), // Base register
2529 N->getOperand(NumVecs + 3), // Incremental
2530 N->getOperand(0)};
2531 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2532
2533 // Update uses of the write back register
2534 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2535
2536 // Update uses of the vector list
2537 SDValue SuperReg = SDValue(Ld, 1);
2538 if (NumVecs == 1) {
2539 ReplaceUses(SDValue(N, 0),
2540 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2541 } else {
2542 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2543 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2544 AArch64::qsub2, AArch64::qsub3 };
2545 for (unsigned i = 0; i < NumVecs; ++i) {
2546 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2547 SuperReg);
2548 if (Narrow)
2549 NV = NarrowVector(NV, *CurDAG);
2550 ReplaceUses(SDValue(N, i), NV);
2551 }
2552 }
2553
2554 // Update the Chain
2555 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2556 CurDAG->RemoveDeadNode(N);
2557}
2558
2559void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2560 unsigned Opc) {
2561 SDLoc dl(N);
2562 EVT VT = N->getOperand(2)->getValueType(0);
2563 bool Narrow = VT.getSizeInBits() == 64;
2564
2565 // Form a REG_SEQUENCE to force register allocation.
2566 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2567
2568 if (Narrow)
2569 transform(Regs, Regs.begin(),
2570 WidenVector(*CurDAG));
2571
2572 SDValue RegSeq = createQTuple(Regs);
2573
2574 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2575
2576 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2577 N->getOperand(NumVecs + 3), N->getOperand(0)};
2578 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2579
2580 // Transfer memoperands.
2581 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2582 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2583
2584 ReplaceNode(N, St);
2585}
2586
2587void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2588 unsigned Opc) {
2589 SDLoc dl(N);
2590 EVT VT = N->getOperand(2)->getValueType(0);
2591 bool Narrow = VT.getSizeInBits() == 64;
2592
2593 // Form a REG_SEQUENCE to force register allocation.
2594 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2595
2596 if (Narrow)
2597 transform(Regs, Regs.begin(),
2598 WidenVector(*CurDAG));
2599
2600 SDValue RegSeq = createQTuple(Regs);
2601
2602 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2603 MVT::Other};
2604
2605 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2606
2607 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2608 N->getOperand(NumVecs + 2), // Base Register
2609 N->getOperand(NumVecs + 3), // Incremental
2610 N->getOperand(0)};
2611 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2612
2613 // Transfer memoperands.
2614 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2615 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2616
2617 ReplaceNode(N, St);
2618}
2619
2621 unsigned &Opc, SDValue &Opd0,
2622 unsigned &LSB, unsigned &MSB,
2623 unsigned NumberOfIgnoredLowBits,
2624 bool BiggerPattern) {
2625 assert(N->getOpcode() == ISD::AND &&
2626 "N must be a AND operation to call this function");
2627
2628 EVT VT = N->getValueType(0);
2629
2630 // Here we can test the type of VT and return false when the type does not
2631 // match, but since it is done prior to that call in the current context
2632 // we turned that into an assert to avoid redundant code.
2633 assert((VT == MVT::i32 || VT == MVT::i64) &&
2634 "Type checking must have been done before calling this function");
2635
2636 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2637 // changed the AND node to a 32-bit mask operation. We'll have to
2638 // undo that as part of the transform here if we want to catch all
2639 // the opportunities.
2640 // Currently the NumberOfIgnoredLowBits argument helps to recover
2641 // from these situations when matching bigger pattern (bitfield insert).
2642
2643 // For unsigned extracts, check for a shift right and mask
2644 uint64_t AndImm = 0;
2645 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2646 return false;
2647
2648 const SDNode *Op0 = N->getOperand(0).getNode();
2649
2650 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2651 // simplified. Try to undo that
2652 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2653
2654 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2655 if (AndImm & (AndImm + 1))
2656 return false;
2657
2658 bool ClampMSB = false;
2659 uint64_t SrlImm = 0;
2660 // Handle the SRL + ANY_EXTEND case.
2661 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2662 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2663 // Extend the incoming operand of the SRL to 64-bit.
2664 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2665 // Make sure to clamp the MSB so that we preserve the semantics of the
2666 // original operations.
2667 ClampMSB = true;
2668 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2670 SrlImm)) {
2671 // If the shift result was truncated, we can still combine them.
2672 Opd0 = Op0->getOperand(0).getOperand(0);
2673
2674 // Use the type of SRL node.
2675 VT = Opd0->getValueType(0);
2676 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2677 Opd0 = Op0->getOperand(0);
2678 ClampMSB = (VT == MVT::i32);
2679 } else if (BiggerPattern) {
2680 // Let's pretend a 0 shift right has been performed.
2681 // The resulting code will be at least as good as the original one
2682 // plus it may expose more opportunities for bitfield insert pattern.
2683 // FIXME: Currently we limit this to the bigger pattern, because
2684 // some optimizations expect AND and not UBFM.
2685 Opd0 = N->getOperand(0);
2686 } else
2687 return false;
2688
2689 // Bail out on large immediates. This happens when no proper
2690 // combining/constant folding was performed.
2691 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2692 LLVM_DEBUG(
2693 (dbgs() << N
2694 << ": Found large shift immediate, this should not happen\n"));
2695 return false;
2696 }
2697
2698 LSB = SrlImm;
2699 MSB = SrlImm +
2700 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2701 : llvm::countr_one<uint64_t>(AndImm)) -
2702 1;
2703 if (ClampMSB)
2704 // Since we're moving the extend before the right shift operation, we need
2705 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2706 // the zeros which would get shifted in with the original right shift
2707 // operation.
2708 MSB = MSB > 31 ? 31 : MSB;
2709
2710 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2711 return true;
2712}
2713
2715 SDValue &Opd0, unsigned &Immr,
2716 unsigned &Imms) {
2717 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2718
2719 EVT VT = N->getValueType(0);
2720 unsigned BitWidth = VT.getSizeInBits();
2721 assert((VT == MVT::i32 || VT == MVT::i64) &&
2722 "Type checking must have been done before calling this function");
2723
2724 SDValue Op = N->getOperand(0);
2725 if (Op->getOpcode() == ISD::TRUNCATE) {
2726 Op = Op->getOperand(0);
2727 VT = Op->getValueType(0);
2728 BitWidth = VT.getSizeInBits();
2729 }
2730
2731 uint64_t ShiftImm;
2732 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2733 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2734 return false;
2735
2736 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2737 if (ShiftImm + Width > BitWidth)
2738 return false;
2739
2740 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2741 Opd0 = Op.getOperand(0);
2742 Immr = ShiftImm;
2743 Imms = ShiftImm + Width - 1;
2744 return true;
2745}
2746
2748 SDValue &Opd0, unsigned &LSB,
2749 unsigned &MSB) {
2750 // We are looking for the following pattern which basically extracts several
2751 // continuous bits from the source value and places it from the LSB of the
2752 // destination value, all other bits of the destination value or set to zero:
2753 //
2754 // Value2 = AND Value, MaskImm
2755 // SRL Value2, ShiftImm
2756 //
2757 // with MaskImm >> ShiftImm to search for the bit width.
2758 //
2759 // This gets selected into a single UBFM:
2760 //
2761 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2762 //
2763
2764 if (N->getOpcode() != ISD::SRL)
2765 return false;
2766
2767 uint64_t AndMask = 0;
2768 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2769 return false;
2770
2771 Opd0 = N->getOperand(0).getOperand(0);
2772
2773 uint64_t SrlImm = 0;
2774 if (!isIntImmediate(N->getOperand(1), SrlImm))
2775 return false;
2776
2777 // Check whether we really have several bits extract here.
2778 if (!isMask_64(AndMask >> SrlImm))
2779 return false;
2780
2781 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2782 LSB = SrlImm;
2783 MSB = llvm::Log2_64(AndMask);
2784 return true;
2785}
2786
2787static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2788 unsigned &Immr, unsigned &Imms,
2789 bool BiggerPattern) {
2790 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2791 "N must be a SHR/SRA operation to call this function");
2792
2793 EVT VT = N->getValueType(0);
2794
2795 // Here we can test the type of VT and return false when the type does not
2796 // match, but since it is done prior to that call in the current context
2797 // we turned that into an assert to avoid redundant code.
2798 assert((VT == MVT::i32 || VT == MVT::i64) &&
2799 "Type checking must have been done before calling this function");
2800
2801 // Check for AND + SRL doing several bits extract.
2802 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2803 return true;
2804
2805 // We're looking for a shift of a shift.
2806 uint64_t ShlImm = 0;
2807 uint64_t TruncBits = 0;
2808 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2809 Opd0 = N->getOperand(0).getOperand(0);
2810 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2811 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2812 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2813 // be considered as setting high 32 bits as zero. Our strategy here is to
2814 // always generate 64bit UBFM. This consistency will help the CSE pass
2815 // later find more redundancy.
2816 Opd0 = N->getOperand(0).getOperand(0);
2817 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2818 VT = Opd0.getValueType();
2819 assert(VT == MVT::i64 && "the promoted type should be i64");
2820 } else if (BiggerPattern) {
2821 // Let's pretend a 0 shift left has been performed.
2822 // FIXME: Currently we limit this to the bigger pattern case,
2823 // because some optimizations expect AND and not UBFM
2824 Opd0 = N->getOperand(0);
2825 } else
2826 return false;
2827
2828 // Missing combines/constant folding may have left us with strange
2829 // constants.
2830 if (ShlImm >= VT.getSizeInBits()) {
2831 LLVM_DEBUG(
2832 (dbgs() << N
2833 << ": Found large shift immediate, this should not happen\n"));
2834 return false;
2835 }
2836
2837 uint64_t SrlImm = 0;
2838 if (!isIntImmediate(N->getOperand(1), SrlImm))
2839 return false;
2840
2841 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2842 "bad amount in shift node!");
2843 int immr = SrlImm - ShlImm;
2844 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2845 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2846 // SRA requires a signed extraction
2847 if (VT == MVT::i32)
2848 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2849 else
2850 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2851 return true;
2852}
2853
2854bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2855 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2856
2857 EVT VT = N->getValueType(0);
2858 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2859 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2860 return false;
2861
2862 uint64_t ShiftImm;
2863 SDValue Op = N->getOperand(0);
2864 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2865 return false;
2866
2867 SDLoc dl(N);
2868 // Extend the incoming operand of the shift to 64-bits.
2869 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2870 unsigned Immr = ShiftImm;
2871 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2872 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2873 CurDAG->getTargetConstant(Imms, dl, VT)};
2874 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2875 return true;
2876}
2877
2878static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2879 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2880 unsigned NumberOfIgnoredLowBits = 0,
2881 bool BiggerPattern = false) {
2882 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2883 return false;
2884
2885 switch (N->getOpcode()) {
2886 default:
2887 if (!N->isMachineOpcode())
2888 return false;
2889 break;
2890 case ISD::AND:
2891 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2892 NumberOfIgnoredLowBits, BiggerPattern);
2893 case ISD::SRL:
2894 case ISD::SRA:
2895 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2896
2898 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2899 }
2900
2901 unsigned NOpc = N->getMachineOpcode();
2902 switch (NOpc) {
2903 default:
2904 return false;
2905 case AArch64::SBFMWri:
2906 case AArch64::UBFMWri:
2907 case AArch64::SBFMXri:
2908 case AArch64::UBFMXri:
2909 Opc = NOpc;
2910 Opd0 = N->getOperand(0);
2911 Immr = N->getConstantOperandVal(1);
2912 Imms = N->getConstantOperandVal(2);
2913 return true;
2914 }
2915 // Unreachable
2916 return false;
2917}
2918
2919bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2920 unsigned Opc, Immr, Imms;
2921 SDValue Opd0;
2922 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2923 return false;
2924
2925 EVT VT = N->getValueType(0);
2926 SDLoc dl(N);
2927
2928 // If the bit extract operation is 64bit but the original type is 32bit, we
2929 // need to add one EXTRACT_SUBREG.
2930 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2931 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2932 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2933
2934 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2935 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2936 MVT::i32, SDValue(BFM, 0));
2937 ReplaceNode(N, Inner.getNode());
2938 return true;
2939 }
2940
2941 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2942 CurDAG->getTargetConstant(Imms, dl, VT)};
2943 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2944 return true;
2945}
2946
2947/// Does DstMask form a complementary pair with the mask provided by
2948/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2949/// this asks whether DstMask zeroes precisely those bits that will be set by
2950/// the other half.
2951static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2952 unsigned NumberOfIgnoredHighBits, EVT VT) {
2953 assert((VT == MVT::i32 || VT == MVT::i64) &&
2954 "i32 or i64 mask type expected!");
2955 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2956
2957 // Enable implicitTrunc as we're intentionally ignoring high bits.
2958 APInt SignificantDstMask =
2959 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
2960 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2961
2962 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2963 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2964}
2965
2966// Look for bits that will be useful for later uses.
2967// A bit is consider useless as soon as it is dropped and never used
2968// before it as been dropped.
2969// E.g., looking for useful bit of x
2970// 1. y = x & 0x7
2971// 2. z = y >> 2
2972// After #1, x useful bits are 0x7, then the useful bits of x, live through
2973// y.
2974// After #2, the useful bits of x are 0x4.
2975// However, if x is used on an unpredictable instruction, then all its bits
2976// are useful.
2977// E.g.
2978// 1. y = x & 0x7
2979// 2. z = y >> 2
2980// 3. str x, [@x]
2981static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2982
2984 unsigned Depth) {
2985 uint64_t Imm =
2986 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2987 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2988 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2989 getUsefulBits(Op, UsefulBits, Depth + 1);
2990}
2991
2993 uint64_t Imm, uint64_t MSB,
2994 unsigned Depth) {
2995 // inherit the bitwidth value
2996 APInt OpUsefulBits(UsefulBits);
2997 OpUsefulBits = 1;
2998
2999 if (MSB >= Imm) {
3000 OpUsefulBits <<= MSB - Imm + 1;
3001 --OpUsefulBits;
3002 // The interesting part will be in the lower part of the result
3003 getUsefulBits(Op, OpUsefulBits, Depth + 1);
3004 // The interesting part was starting at Imm in the argument
3005 OpUsefulBits <<= Imm;
3006 } else {
3007 OpUsefulBits <<= MSB + 1;
3008 --OpUsefulBits;
3009 // The interesting part will be shifted in the result
3010 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
3011 getUsefulBits(Op, OpUsefulBits, Depth + 1);
3012 // The interesting part was at zero in the argument
3013 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
3014 }
3015
3016 UsefulBits &= OpUsefulBits;
3017}
3018
3019static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
3020 unsigned Depth) {
3021 uint64_t Imm =
3022 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
3023 uint64_t MSB =
3024 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3025
3026 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
3027}
3028
3030 unsigned Depth) {
3031 uint64_t ShiftTypeAndValue =
3032 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3033 APInt Mask(UsefulBits);
3034 Mask.clearAllBits();
3035 Mask.flipAllBits();
3036
3037 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
3038 // Shift Left
3039 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3040 Mask <<= ShiftAmt;
3041 getUsefulBits(Op, Mask, Depth + 1);
3042 Mask.lshrInPlace(ShiftAmt);
3043 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
3044 // Shift Right
3045 // We do not handle AArch64_AM::ASR, because the sign will change the
3046 // number of useful bits
3047 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3048 Mask.lshrInPlace(ShiftAmt);
3049 getUsefulBits(Op, Mask, Depth + 1);
3050 Mask <<= ShiftAmt;
3051 } else
3052 return;
3053
3054 UsefulBits &= Mask;
3055}
3056
3057static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
3058 unsigned Depth) {
3059 uint64_t Imm =
3060 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3061 uint64_t MSB =
3062 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
3063
3064 APInt OpUsefulBits(UsefulBits);
3065 OpUsefulBits = 1;
3066
3067 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3068 ResultUsefulBits.flipAllBits();
3069 APInt Mask(UsefulBits.getBitWidth(), 0);
3070
3071 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
3072
3073 if (MSB >= Imm) {
3074 // The instruction is a BFXIL.
3075 uint64_t Width = MSB - Imm + 1;
3076 uint64_t LSB = Imm;
3077
3078 OpUsefulBits <<= Width;
3079 --OpUsefulBits;
3080
3081 if (Op.getOperand(1) == Orig) {
3082 // Copy the low bits from the result to bits starting from LSB.
3083 Mask = ResultUsefulBits & OpUsefulBits;
3084 Mask <<= LSB;
3085 }
3086
3087 if (Op.getOperand(0) == Orig)
3088 // Bits starting from LSB in the input contribute to the result.
3089 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3090 } else {
3091 // The instruction is a BFI.
3092 uint64_t Width = MSB + 1;
3093 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3094
3095 OpUsefulBits <<= Width;
3096 --OpUsefulBits;
3097 OpUsefulBits <<= LSB;
3098
3099 if (Op.getOperand(1) == Orig) {
3100 // Copy the bits from the result to the zero bits.
3101 Mask = ResultUsefulBits & OpUsefulBits;
3102 Mask.lshrInPlace(LSB);
3103 }
3104
3105 if (Op.getOperand(0) == Orig)
3106 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3107 }
3108
3109 UsefulBits &= Mask;
3110}
3111
3112static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3113 SDValue Orig, unsigned Depth) {
3114
3115 // Users of this node should have already been instruction selected
3116 // FIXME: Can we turn that into an assert?
3117 if (!UserNode->isMachineOpcode())
3118 return;
3119
3120 switch (UserNode->getMachineOpcode()) {
3121 default:
3122 return;
3123 case AArch64::ANDSWri:
3124 case AArch64::ANDSXri:
3125 case AArch64::ANDWri:
3126 case AArch64::ANDXri:
3127 // We increment Depth only when we call the getUsefulBits
3128 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3129 Depth);
3130 case AArch64::UBFMWri:
3131 case AArch64::UBFMXri:
3132 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3133
3134 case AArch64::ORRWrs:
3135 case AArch64::ORRXrs:
3136 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3137 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3138 Depth);
3139 return;
3140 case AArch64::BFMWri:
3141 case AArch64::BFMXri:
3142 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3143
3144 case AArch64::STRBBui:
3145 case AArch64::STURBBi:
3146 if (UserNode->getOperand(0) != Orig)
3147 return;
3148 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3149 return;
3150
3151 case AArch64::STRHHui:
3152 case AArch64::STURHHi:
3153 if (UserNode->getOperand(0) != Orig)
3154 return;
3155 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3156 return;
3157 }
3158}
3159
3160static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3162 return;
3163 // Initialize UsefulBits
3164 if (!Depth) {
3165 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3166 // At the beginning, assume every produced bits is useful
3167 UsefulBits = APInt(Bitwidth, 0);
3168 UsefulBits.flipAllBits();
3169 }
3170 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3171
3172 for (SDNode *Node : Op.getNode()->users()) {
3173 // A use cannot produce useful bits
3174 APInt UsefulBitsForUse = APInt(UsefulBits);
3175 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3176 UsersUsefulBits |= UsefulBitsForUse;
3177 }
3178 // UsefulBits contains the produced bits that are meaningful for the
3179 // current definition, thus a user cannot make a bit meaningful at
3180 // this point
3181 UsefulBits &= UsersUsefulBits;
3182}
3183
3184/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3185/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3186/// 0, return Op unchanged.
3187static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3188 if (ShlAmount == 0)
3189 return Op;
3190
3191 EVT VT = Op.getValueType();
3192 SDLoc dl(Op);
3193 unsigned BitWidth = VT.getSizeInBits();
3194 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3195
3196 SDNode *ShiftNode;
3197 if (ShlAmount > 0) {
3198 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3199 ShiftNode = CurDAG->getMachineNode(
3200 UBFMOpc, dl, VT, Op,
3201 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3202 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3203 } else {
3204 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3205 assert(ShlAmount < 0 && "expected right shift");
3206 int ShrAmount = -ShlAmount;
3207 ShiftNode = CurDAG->getMachineNode(
3208 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3209 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3210 }
3211
3212 return SDValue(ShiftNode, 0);
3213}
3214
3215// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3216static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3217 bool BiggerPattern,
3218 const uint64_t NonZeroBits,
3219 SDValue &Src, int &DstLSB,
3220 int &Width);
3221
3222// For bit-field-positioning pattern "shl VAL, N)".
3223static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3224 bool BiggerPattern,
3225 const uint64_t NonZeroBits,
3226 SDValue &Src, int &DstLSB,
3227 int &Width);
3228
3229/// Does this tree qualify as an attempt to move a bitfield into position,
3230/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3232 bool BiggerPattern, SDValue &Src,
3233 int &DstLSB, int &Width) {
3234 EVT VT = Op.getValueType();
3235 unsigned BitWidth = VT.getSizeInBits();
3236 (void)BitWidth;
3237 assert(BitWidth == 32 || BitWidth == 64);
3238
3239 KnownBits Known = CurDAG->computeKnownBits(Op);
3240
3241 // Non-zero in the sense that they're not provably zero, which is the key
3242 // point if we want to use this value
3243 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3244 if (!isShiftedMask_64(NonZeroBits))
3245 return false;
3246
3247 switch (Op.getOpcode()) {
3248 default:
3249 break;
3250 case ISD::AND:
3251 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3252 NonZeroBits, Src, DstLSB, Width);
3253 case ISD::SHL:
3254 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3255 NonZeroBits, Src, DstLSB, Width);
3256 }
3257
3258 return false;
3259}
3260
3262 bool BiggerPattern,
3263 const uint64_t NonZeroBits,
3264 SDValue &Src, int &DstLSB,
3265 int &Width) {
3266 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3267
3268 EVT VT = Op.getValueType();
3269 assert((VT == MVT::i32 || VT == MVT::i64) &&
3270 "Caller guarantees VT is one of i32 or i64");
3271 (void)VT;
3272
3273 uint64_t AndImm;
3274 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3275 return false;
3276
3277 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3278 // 1) (AndImm & (1 << POS) == 0)
3279 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3280 //
3281 // 1) and 2) don't agree so something must be wrong (e.g., in
3282 // 'SelectionDAG::computeKnownBits')
3283 assert((~AndImm & NonZeroBits) == 0 &&
3284 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3285
3286 SDValue AndOp0 = Op.getOperand(0);
3287
3288 uint64_t ShlImm;
3289 SDValue ShlOp0;
3290 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3291 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3292 ShlOp0 = AndOp0.getOperand(0);
3293 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3295 ShlImm)) {
3296 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3297
3298 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3299 SDValue ShlVal = AndOp0.getOperand(0);
3300
3301 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3302 // expect VT to be MVT::i32.
3303 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3304
3305 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3306 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3307 } else
3308 return false;
3309
3310 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3311 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3312 // AndOp0+AND.
3313 if (!BiggerPattern && !AndOp0.hasOneUse())
3314 return false;
3315
3316 DstLSB = llvm::countr_zero(NonZeroBits);
3317 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3318
3319 // Bail out on large Width. This happens when no proper combining / constant
3320 // folding was performed.
3321 if (Width >= (int)VT.getSizeInBits()) {
3322 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3323 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3324 // "val".
3325 // If VT is i32, what Width >= 32 means:
3326 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3327 // demands at least 'Width' bits (after dag-combiner). This together with
3328 // `any_extend` Op (undefined higher bits) indicates missed combination
3329 // when lowering the 'and' IR instruction to an machine IR instruction.
3330 LLVM_DEBUG(
3331 dbgs()
3332 << "Found large Width in bit-field-positioning -- this indicates no "
3333 "proper combining / constant folding was performed\n");
3334 return false;
3335 }
3336
3337 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3338 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3339 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3340 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3341 // which case it is not profitable to insert an extra shift.
3342 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3343 return false;
3344
3345 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3346 return true;
3347}
3348
3349// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3350// UBFIZ.
3352 SDValue &Src, int &DstLSB,
3353 int &Width) {
3354 // Caller should have verified that N is a left shift with constant shift
3355 // amount; asserts that.
3356 assert(Op.getOpcode() == ISD::SHL &&
3357 "Op.getNode() should be a SHL node to call this function");
3358 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3359 "Op.getNode() should shift ShlImm to call this function");
3360
3361 uint64_t AndImm = 0;
3362 SDValue Op0 = Op.getOperand(0);
3363 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3364 return false;
3365
3366 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3367 if (isMask_64(ShiftedAndImm)) {
3368 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3369 // should end with Mask, and could be prefixed with random bits if those
3370 // bits are shifted out.
3371 //
3372 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3373 // the AND result corresponding to those bits are shifted out, so it's fine
3374 // to not extract them.
3375 Width = llvm::countr_one(ShiftedAndImm);
3376 DstLSB = ShlImm;
3377 Src = Op0.getOperand(0);
3378 return true;
3379 }
3380 return false;
3381}
3382
3384 bool BiggerPattern,
3385 const uint64_t NonZeroBits,
3386 SDValue &Src, int &DstLSB,
3387 int &Width) {
3388 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3389
3390 EVT VT = Op.getValueType();
3391 assert((VT == MVT::i32 || VT == MVT::i64) &&
3392 "Caller guarantees that type is i32 or i64");
3393 (void)VT;
3394
3395 uint64_t ShlImm;
3396 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3397 return false;
3398
3399 if (!BiggerPattern && !Op.hasOneUse())
3400 return false;
3401
3402 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3403 return true;
3404
3405 DstLSB = llvm::countr_zero(NonZeroBits);
3406 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3407
3408 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3409 return false;
3410
3411 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3412 return true;
3413}
3414
3415static bool isShiftedMask(uint64_t Mask, EVT VT) {
3416 assert(VT == MVT::i32 || VT == MVT::i64);
3417 if (VT == MVT::i32)
3418 return isShiftedMask_32(Mask);
3419 return isShiftedMask_64(Mask);
3420}
3421
3422// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3423// inserted only sets known zero bits.
3425 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3426
3427 EVT VT = N->getValueType(0);
3428 if (VT != MVT::i32 && VT != MVT::i64)
3429 return false;
3430
3431 unsigned BitWidth = VT.getSizeInBits();
3432
3433 uint64_t OrImm;
3434 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3435 return false;
3436
3437 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3438 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3439 // performance neutral.
3441 return false;
3442
3443 uint64_t MaskImm;
3444 SDValue And = N->getOperand(0);
3445 // Must be a single use AND with an immediate operand.
3446 if (!And.hasOneUse() ||
3447 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3448 return false;
3449
3450 // Compute the Known Zero for the AND as this allows us to catch more general
3451 // cases than just looking for AND with imm.
3452 KnownBits Known = CurDAG->computeKnownBits(And);
3453
3454 // Non-zero in the sense that they're not provably zero, which is the key
3455 // point if we want to use this value.
3456 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3457
3458 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3459 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3460 return false;
3461
3462 // The bits being inserted must only set those bits that are known to be zero.
3463 if ((OrImm & NotKnownZero) != 0) {
3464 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3465 // currently handle this case.
3466 return false;
3467 }
3468
3469 // BFI/BFXIL dst, src, #lsb, #width.
3470 int LSB = llvm::countr_one(NotKnownZero);
3471 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3472
3473 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3474 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3475 unsigned ImmS = Width - 1;
3476
3477 // If we're creating a BFI instruction avoid cases where we need more
3478 // instructions to materialize the BFI constant as compared to the original
3479 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3480 // should be no worse in this case.
3481 bool IsBFI = LSB != 0;
3482 uint64_t BFIImm = OrImm >> LSB;
3483 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3484 // We have a BFI instruction and we know the constant can't be materialized
3485 // with a ORR-immediate with the zero register.
3486 unsigned OrChunks = 0, BFIChunks = 0;
3487 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3488 if (((OrImm >> Shift) & 0xFFFF) != 0)
3489 ++OrChunks;
3490 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3491 ++BFIChunks;
3492 }
3493 if (BFIChunks > OrChunks)
3494 return false;
3495 }
3496
3497 // Materialize the constant to be inserted.
3498 SDLoc DL(N);
3499 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3500 SDNode *MOVI = CurDAG->getMachineNode(
3501 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3502
3503 // Create the BFI/BFXIL instruction.
3504 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3505 CurDAG->getTargetConstant(ImmR, DL, VT),
3506 CurDAG->getTargetConstant(ImmS, DL, VT)};
3507 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3508 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3509 return true;
3510}
3511
3513 SDValue &ShiftedOperand,
3514 uint64_t &EncodedShiftImm) {
3515 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3516 if (!Dst.hasOneUse())
3517 return false;
3518
3519 EVT VT = Dst.getValueType();
3520 assert((VT == MVT::i32 || VT == MVT::i64) &&
3521 "Caller should guarantee that VT is one of i32 or i64");
3522 const unsigned SizeInBits = VT.getSizeInBits();
3523
3524 SDLoc DL(Dst.getNode());
3525 uint64_t AndImm, ShlImm;
3526 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3527 isShiftedMask_64(AndImm)) {
3528 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3529 SDValue DstOp0 = Dst.getOperand(0);
3530 if (!DstOp0.hasOneUse())
3531 return false;
3532
3533 // An example to illustrate the transformation
3534 // From:
3535 // lsr x8, x1, #1
3536 // and x8, x8, #0x3f80
3537 // bfxil x8, x1, #0, #7
3538 // To:
3539 // and x8, x23, #0x7f
3540 // ubfx x9, x23, #8, #7
3541 // orr x23, x8, x9, lsl #7
3542 //
3543 // The number of instructions remains the same, but ORR is faster than BFXIL
3544 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3545 // the dependency chain is improved after the transformation.
3546 uint64_t SrlImm;
3547 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3548 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3549 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3550 unsigned MaskWidth =
3551 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3552 unsigned UBFMOpc =
3553 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3554 SDNode *UBFMNode = CurDAG->getMachineNode(
3555 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3556 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3557 VT),
3558 CurDAG->getTargetConstant(
3559 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3560 ShiftedOperand = SDValue(UBFMNode, 0);
3561 EncodedShiftImm = AArch64_AM::getShifterImm(
3562 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3563 return true;
3564 }
3565 }
3566 return false;
3567 }
3568
3569 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3570 ShiftedOperand = Dst.getOperand(0);
3571 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3572 return true;
3573 }
3574
3575 uint64_t SrlImm;
3576 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3577 ShiftedOperand = Dst.getOperand(0);
3578 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3579 return true;
3580 }
3581 return false;
3582}
3583
3584// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3585// the operands and select it to AArch64::ORR with shifted registers if
3586// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3587static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3588 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3589 const bool BiggerPattern) {
3590 EVT VT = N->getValueType(0);
3591 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3592 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3593 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3594 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3595 assert((VT == MVT::i32 || VT == MVT::i64) &&
3596 "Expect result type to be i32 or i64 since N is combinable to BFM");
3597 SDLoc DL(N);
3598
3599 // Bail out if BFM simplifies away one node in BFM Dst.
3600 if (OrOpd1 != Dst)
3601 return false;
3602
3603 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3604 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3605 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3606 if (BiggerPattern) {
3607 uint64_t SrcAndImm;
3608 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3609 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3610 // OrOpd0 = AND Src, #Mask
3611 // So BFM simplifies away one AND node from Src and doesn't simplify away
3612 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3613 // one node (from Rd), ORR is better since it has higher throughput and
3614 // smaller latency than BFM on many AArch64 processors (and for the rest
3615 // ORR is at least as good as BFM).
3616 SDValue ShiftedOperand;
3617 uint64_t EncodedShiftImm;
3618 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3619 EncodedShiftImm)) {
3620 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3621 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3622 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3623 return true;
3624 }
3625 }
3626 return false;
3627 }
3628
3629 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3630
3631 uint64_t ShlImm;
3632 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3633 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3634 SDValue Ops[] = {
3635 Dst, Src,
3636 CurDAG->getTargetConstant(
3638 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3639 return true;
3640 }
3641
3642 // Select the following pattern to left-shifted operand rather than BFI.
3643 // %val1 = op ..
3644 // %val2 = shl %val1, #imm
3645 // %res = or %val1, %val2
3646 //
3647 // If N is selected to be BFI, we know that
3648 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3649 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3650 //
3651 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3652 if (OrOpd0.getOperand(0) == OrOpd1) {
3653 SDValue Ops[] = {
3654 OrOpd1, OrOpd1,
3655 CurDAG->getTargetConstant(
3657 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3658 return true;
3659 }
3660 }
3661
3662 uint64_t SrlImm;
3663 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3664 // Select the following pattern to right-shifted operand rather than BFXIL.
3665 // %val1 = op ..
3666 // %val2 = lshr %val1, #imm
3667 // %res = or %val1, %val2
3668 //
3669 // If N is selected to be BFXIL, we know that
3670 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3671 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3672 //
3673 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3674 if (OrOpd0.getOperand(0) == OrOpd1) {
3675 SDValue Ops[] = {
3676 OrOpd1, OrOpd1,
3677 CurDAG->getTargetConstant(
3679 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3680 return true;
3681 }
3682 }
3683
3684 return false;
3685}
3686
3687static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3688 SelectionDAG *CurDAG) {
3689 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3690
3691 EVT VT = N->getValueType(0);
3692 if (VT != MVT::i32 && VT != MVT::i64)
3693 return false;
3694
3695 unsigned BitWidth = VT.getSizeInBits();
3696
3697 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3698 // have the expected shape. Try to undo that.
3699
3700 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3701 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3702
3703 // Given a OR operation, check if we have the following pattern
3704 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3705 // isBitfieldExtractOp)
3706 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3707 // countTrailingZeros(mask2) == imm2 - imm + 1
3708 // f = d | c
3709 // if yes, replace the OR instruction with:
3710 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3711
3712 // OR is commutative, check all combinations of operand order and values of
3713 // BiggerPattern, i.e.
3714 // Opd0, Opd1, BiggerPattern=false
3715 // Opd1, Opd0, BiggerPattern=false
3716 // Opd0, Opd1, BiggerPattern=true
3717 // Opd1, Opd0, BiggerPattern=true
3718 // Several of these combinations may match, so check with BiggerPattern=false
3719 // first since that will produce better results by matching more instructions
3720 // and/or inserting fewer extra instructions.
3721 for (int I = 0; I < 4; ++I) {
3722
3723 SDValue Dst, Src;
3724 unsigned ImmR, ImmS;
3725 bool BiggerPattern = I / 2;
3726 SDValue OrOpd0Val = N->getOperand(I % 2);
3727 SDNode *OrOpd0 = OrOpd0Val.getNode();
3728 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3729 SDNode *OrOpd1 = OrOpd1Val.getNode();
3730
3731 unsigned BFXOpc;
3732 int DstLSB, Width;
3733 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3734 NumberOfIgnoredLowBits, BiggerPattern)) {
3735 // Check that the returned opcode is compatible with the pattern,
3736 // i.e., same type and zero extended (U and not S)
3737 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3738 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3739 continue;
3740
3741 // Compute the width of the bitfield insertion
3742 DstLSB = 0;
3743 Width = ImmS - ImmR + 1;
3744 // FIXME: This constraint is to catch bitfield insertion we may
3745 // want to widen the pattern if we want to grab general bitfield
3746 // move case
3747 if (Width <= 0)
3748 continue;
3749
3750 // If the mask on the insertee is correct, we have a BFXIL operation. We
3751 // can share the ImmR and ImmS values from the already-computed UBFM.
3752 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3753 BiggerPattern,
3754 Src, DstLSB, Width)) {
3755 ImmR = (BitWidth - DstLSB) % BitWidth;
3756 ImmS = Width - 1;
3757 } else
3758 continue;
3759
3760 // Check the second part of the pattern
3761 EVT VT = OrOpd1Val.getValueType();
3762 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3763
3764 // Compute the Known Zero for the candidate of the first operand.
3765 // This allows to catch more general case than just looking for
3766 // AND with imm. Indeed, simplify-demanded-bits may have removed
3767 // the AND instruction because it proves it was useless.
3768 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3769
3770 // Check if there is enough room for the second operand to appear
3771 // in the first one
3772 APInt BitsToBeInserted =
3773 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3774
3775 if ((BitsToBeInserted & ~Known.Zero) != 0)
3776 continue;
3777
3778 // Set the first operand
3779 uint64_t Imm;
3780 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3781 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3782 // In that case, we can eliminate the AND
3783 Dst = OrOpd1->getOperand(0);
3784 else
3785 // Maybe the AND has been removed by simplify-demanded-bits
3786 // or is useful because it discards more bits
3787 Dst = OrOpd1Val;
3788
3789 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3790 // with shifted operand is more efficient.
3791 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3792 BiggerPattern))
3793 return true;
3794
3795 // both parts match
3796 SDLoc DL(N);
3797 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3798 CurDAG->getTargetConstant(ImmS, DL, VT)};
3799 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3800 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3801 return true;
3802 }
3803
3804 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3805 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3806 // mask (e.g., 0x000ffff0).
3807 uint64_t Mask0Imm, Mask1Imm;
3808 SDValue And0 = N->getOperand(0);
3809 SDValue And1 = N->getOperand(1);
3810 if (And0.hasOneUse() && And1.hasOneUse() &&
3811 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3812 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3813 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3814 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3815
3816 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3817 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3818 // bits to be inserted.
3819 if (isShiftedMask(Mask0Imm, VT)) {
3820 std::swap(And0, And1);
3821 std::swap(Mask0Imm, Mask1Imm);
3822 }
3823
3824 SDValue Src = And1->getOperand(0);
3825 SDValue Dst = And0->getOperand(0);
3826 unsigned LSB = llvm::countr_zero(Mask1Imm);
3827 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3828
3829 // The BFXIL inserts the low-order bits from a source register, so right
3830 // shift the needed bits into place.
3831 SDLoc DL(N);
3832 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3833 uint64_t LsrImm = LSB;
3834 if (Src->hasOneUse() &&
3835 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3836 (LsrImm + LSB) < BitWidth) {
3837 Src = Src->getOperand(0);
3838 LsrImm += LSB;
3839 }
3840
3841 SDNode *LSR = CurDAG->getMachineNode(
3842 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3843 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3844
3845 // BFXIL is an alias of BFM, so translate to BFM operands.
3846 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3847 unsigned ImmS = Width - 1;
3848
3849 // Create the BFXIL instruction.
3850 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3851 CurDAG->getTargetConstant(ImmR, DL, VT),
3852 CurDAG->getTargetConstant(ImmS, DL, VT)};
3853 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3854 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3855 return true;
3856 }
3857
3858 return false;
3859}
3860
3861bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3862 if (N->getOpcode() != ISD::OR)
3863 return false;
3864
3865 APInt NUsefulBits;
3866 getUsefulBits(SDValue(N, 0), NUsefulBits);
3867
3868 // If all bits are not useful, just return UNDEF.
3869 if (!NUsefulBits) {
3870 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3871 return true;
3872 }
3873
3874 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3875 return true;
3876
3877 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3878}
3879
3880/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3881/// equivalent of a left shift by a constant amount followed by an and masking
3882/// out a contiguous set of bits.
3883bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3884 if (N->getOpcode() != ISD::AND)
3885 return false;
3886
3887 EVT VT = N->getValueType(0);
3888 if (VT != MVT::i32 && VT != MVT::i64)
3889 return false;
3890
3891 SDValue Op0;
3892 int DstLSB, Width;
3893 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3894 Op0, DstLSB, Width))
3895 return false;
3896
3897 // ImmR is the rotate right amount.
3898 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3899 // ImmS is the most significant bit of the source to be moved.
3900 unsigned ImmS = Width - 1;
3901
3902 SDLoc DL(N);
3903 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3904 CurDAG->getTargetConstant(ImmS, DL, VT)};
3905 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3906 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3907 return true;
3908}
3909
3910/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3911/// variable shift/rotate instructions.
3912bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3913 EVT VT = N->getValueType(0);
3914
3915 unsigned Opc;
3916 switch (N->getOpcode()) {
3917 case ISD::ROTR:
3918 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3919 break;
3920 case ISD::SHL:
3921 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3922 break;
3923 case ISD::SRL:
3924 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3925 break;
3926 case ISD::SRA:
3927 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3928 break;
3929 default:
3930 return false;
3931 }
3932
3933 uint64_t Size;
3934 uint64_t Bits;
3935 if (VT == MVT::i32) {
3936 Bits = 5;
3937 Size = 32;
3938 } else if (VT == MVT::i64) {
3939 Bits = 6;
3940 Size = 64;
3941 } else
3942 return false;
3943
3944 SDValue ShiftAmt = N->getOperand(1);
3945 SDLoc DL(N);
3946 SDValue NewShiftAmt;
3947
3948 // Skip over an extend of the shift amount.
3949 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3950 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3951 ShiftAmt = ShiftAmt->getOperand(0);
3952
3953 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3954 SDValue Add0 = ShiftAmt->getOperand(0);
3955 SDValue Add1 = ShiftAmt->getOperand(1);
3956 uint64_t Add0Imm;
3957 uint64_t Add1Imm;
3958 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3959 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3960 // to avoid the ADD/SUB.
3961 NewShiftAmt = Add0;
3962 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3963 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3964 (Add0Imm % Size == 0)) {
3965 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3966 // to generate a NEG instead of a SUB from a constant.
3967 unsigned NegOpc;
3968 unsigned ZeroReg;
3969 EVT SubVT = ShiftAmt->getValueType(0);
3970 if (SubVT == MVT::i32) {
3971 NegOpc = AArch64::SUBWrr;
3972 ZeroReg = AArch64::WZR;
3973 } else {
3974 assert(SubVT == MVT::i64);
3975 NegOpc = AArch64::SUBXrr;
3976 ZeroReg = AArch64::XZR;
3977 }
3978 SDValue Zero =
3979 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3980 MachineSDNode *Neg =
3981 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3982 NewShiftAmt = SDValue(Neg, 0);
3983 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3984 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3985 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3986 // to generate a NOT instead of a SUB from a constant.
3987 unsigned NotOpc;
3988 unsigned ZeroReg;
3989 EVT SubVT = ShiftAmt->getValueType(0);
3990 if (SubVT == MVT::i32) {
3991 NotOpc = AArch64::ORNWrr;
3992 ZeroReg = AArch64::WZR;
3993 } else {
3994 assert(SubVT == MVT::i64);
3995 NotOpc = AArch64::ORNXrr;
3996 ZeroReg = AArch64::XZR;
3997 }
3998 SDValue Zero =
3999 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
4000 MachineSDNode *Not =
4001 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
4002 NewShiftAmt = SDValue(Not, 0);
4003 } else
4004 return false;
4005 } else {
4006 // If the shift amount is masked with an AND, check that the mask covers the
4007 // bits that are implicitly ANDed off by the above opcodes and if so, skip
4008 // the AND.
4009 uint64_t MaskImm;
4010 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
4011 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
4012 return false;
4013
4014 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
4015 return false;
4016
4017 NewShiftAmt = ShiftAmt->getOperand(0);
4018 }
4019
4020 // Narrow/widen the shift amount to match the size of the shift operation.
4021 if (VT == MVT::i32)
4022 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
4023 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
4024 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
4025 MachineSDNode *Ext = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, VT,
4026 NewShiftAmt, SubReg);
4027 NewShiftAmt = SDValue(Ext, 0);
4028 }
4029
4030 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
4031 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4032 return true;
4033}
4034
4036 SDValue &FixedPos,
4037 unsigned RegWidth,
4038 bool isReciprocal) {
4039 APFloat FVal(0.0);
4041 FVal = CN->getValueAPF();
4042 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
4043 // Some otherwise illegal constants are allowed in this case.
4044 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
4045 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
4046 return false;
4047
4048 ConstantPoolSDNode *CN =
4049 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
4050 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
4051 } else
4052 return false;
4053
4054 if (unsigned FBits =
4055 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal)) {
4056 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4057 return true;
4058 }
4059
4060 return false;
4061}
4062
4063bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4064 unsigned RegWidth) {
4065 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4066 /*isReciprocal*/ false);
4067}
4068
4069bool AArch64DAGToDAGISel::SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos,
4070 unsigned RegWidth) {
4071 if ((N.getOpcode() == AArch64ISD::NVCAST || N.getOpcode() == ISD::BITCAST) &&
4072 N.getValueType().getScalarSizeInBits() ==
4073 N.getOperand(0).getValueType().getScalarSizeInBits())
4074 N = N.getOperand(0);
4075
4076 auto ImmToFloat = [RegWidth](APInt Imm) {
4077 switch (RegWidth) {
4078 case 16:
4079 return APFloat(APFloat::IEEEhalf(), Imm);
4080 case 32:
4081 return APFloat(APFloat::IEEEsingle(), Imm);
4082 case 64:
4083 return APFloat(APFloat::IEEEdouble(), Imm);
4084 default:
4085 llvm_unreachable("Unexpected RegWidth!");
4086 };
4087 };
4088
4089 APFloat FVal(0.0);
4090 switch (N->getOpcode()) {
4091 case AArch64ISD::MOVIshift:
4092 FVal = ImmToFloat(APInt(RegWidth, N.getConstantOperandVal(0)
4093 << N.getConstantOperandVal(1)));
4094 break;
4095 case AArch64ISD::FMOV:
4096 assert(RegWidth == 32 || RegWidth == 64);
4097 if (RegWidth == 32)
4098 FVal = ImmToFloat(
4099 APInt(RegWidth, (uint32_t)AArch64_AM::decodeAdvSIMDModImmType11(
4100 N.getConstantOperandVal(0))));
4101 else
4102 FVal = ImmToFloat(APInt(RegWidth, AArch64_AM::decodeAdvSIMDModImmType12(
4103 N.getConstantOperandVal(0))));
4104 break;
4105 case AArch64ISD::DUP:
4106 if (isa<ConstantSDNode>(N.getOperand(0)))
4107 FVal = ImmToFloat(N.getConstantOperandAPInt(0).trunc(RegWidth));
4108 else
4109 return false;
4110 break;
4111 default:
4112 return false;
4113 }
4114
4115 if (unsigned FBits = CheckFixedPointOperandConstant(FVal, RegWidth,
4116 /*isReciprocal*/ false)) {
4117 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4118 return true;
4119 }
4120
4121 return false;
4122}
4123
4124bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4125 SDValue &FixedPos,
4126 unsigned RegWidth) {
4127 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4128 /*isReciprocal*/ true);
4129}
4130
4131// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4132// of the string and obtains the integer values from them and combines these
4133// into a single value to be used in the MRS/MSR instruction.
4136 RegString.split(Fields, ':');
4137
4138 if (Fields.size() == 1)
4139 return -1;
4140
4141 assert(Fields.size() == 5
4142 && "Invalid number of fields in read register string");
4143
4145 bool AllIntFields = true;
4146
4147 for (StringRef Field : Fields) {
4148 unsigned IntField;
4149 AllIntFields &= !Field.getAsInteger(10, IntField);
4150 Ops.push_back(IntField);
4151 }
4152
4153 assert(AllIntFields &&
4154 "Unexpected non-integer value in special register string.");
4155 (void)AllIntFields;
4156
4157 // Need to combine the integer fields of the string into a single value
4158 // based on the bit encoding of MRS/MSR instruction.
4159 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
4160 (Ops[3] << 3) | (Ops[4]);
4161}
4162
4163// Lower the read_register intrinsic to an MRS instruction node if the special
4164// register string argument is either of the form detailed in the ALCE (the
4165// form described in getIntOperandsFromRegisterString) or is a named register
4166// known by the MRS SysReg mapper.
4167bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4168 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4169 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4170 SDLoc DL(N);
4171
4172 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4173
4174 unsigned Opcode64Bit = AArch64::MRS;
4175 int Imm = getIntOperandFromRegisterString(RegString->getString());
4176 if (Imm == -1) {
4177 // No match, Use the sysreg mapper to map the remaining possible strings to
4178 // the value for the register to be used for the instruction operand.
4179 const auto *TheReg =
4180 AArch64SysReg::lookupSysRegByName(RegString->getString());
4181 if (TheReg && TheReg->Readable &&
4182 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4183 Imm = TheReg->Encoding;
4184 else
4185 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4186
4187 if (Imm == -1) {
4188 // Still no match, see if this is "pc" or give up.
4189 if (!ReadIs128Bit && RegString->getString() == "pc") {
4190 Opcode64Bit = AArch64::ADR;
4191 Imm = 0;
4192 } else {
4193 return false;
4194 }
4195 }
4196 }
4197
4198 SDValue InChain = N->getOperand(0);
4199 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4200 if (!ReadIs128Bit) {
4201 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4202 {SysRegImm, InChain});
4203 } else {
4204 SDNode *MRRS = CurDAG->getMachineNode(
4205 AArch64::MRRS, DL,
4206 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4207 {SysRegImm, InChain});
4208
4209 // Sysregs are not endian. The even register always contains the low half
4210 // of the register.
4211 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4212 SDValue(MRRS, 0));
4213 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4214 SDValue(MRRS, 0));
4215 SDValue OutChain = SDValue(MRRS, 1);
4216
4217 ReplaceUses(SDValue(N, 0), Lo);
4218 ReplaceUses(SDValue(N, 1), Hi);
4219 ReplaceUses(SDValue(N, 2), OutChain);
4220 };
4221 return true;
4222}
4223
4224// Lower the write_register intrinsic to an MSR instruction node if the special
4225// register string argument is either of the form detailed in the ALCE (the
4226// form described in getIntOperandsFromRegisterString) or is a named register
4227// known by the MSR SysReg mapper.
4228bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4229 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4230 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4231 SDLoc DL(N);
4232
4233 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4234
4235 if (!WriteIs128Bit) {
4236 // Check if the register was one of those allowed as the pstatefield value
4237 // in the MSR (immediate) instruction. To accept the values allowed in the
4238 // pstatefield for the MSR (immediate) instruction, we also require that an
4239 // immediate value has been provided as an argument, we know that this is
4240 // the case as it has been ensured by semantic checking.
4241 auto trySelectPState = [&](auto PMapper, unsigned State) {
4242 if (PMapper) {
4243 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4244 "Expected a constant integer expression.");
4245 unsigned Reg = PMapper->Encoding;
4246 uint64_t Immed = N->getConstantOperandVal(2);
4247 CurDAG->SelectNodeTo(
4248 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4249 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4250 return true;
4251 }
4252 return false;
4253 };
4254
4255 if (trySelectPState(
4256 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4257 AArch64::MSRpstateImm4))
4258 return true;
4259 if (trySelectPState(
4260 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4261 AArch64::MSRpstateImm1))
4262 return true;
4263 }
4264
4265 int Imm = getIntOperandFromRegisterString(RegString->getString());
4266 if (Imm == -1) {
4267 // Use the sysreg mapper to attempt to map the remaining possible strings
4268 // to the value for the register to be used for the MSR (register)
4269 // instruction operand.
4270 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4271 if (TheReg && TheReg->Writeable &&
4272 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4273 Imm = TheReg->Encoding;
4274 else
4275 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4276
4277 if (Imm == -1)
4278 return false;
4279 }
4280
4281 SDValue InChain = N->getOperand(0);
4282 if (!WriteIs128Bit) {
4283 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4284 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4285 N->getOperand(2), InChain);
4286 } else {
4287 // No endian swap. The lower half always goes into the even subreg, and the
4288 // higher half always into the odd supreg.
4289 SDNode *Pair = CurDAG->getMachineNode(
4290 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4291 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4292 MVT::i32),
4293 N->getOperand(2),
4294 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4295 N->getOperand(3),
4296 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4297
4298 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4299 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4300 SDValue(Pair, 0), InChain);
4301 }
4302
4303 return true;
4304}
4305
4306/// We've got special pseudo-instructions for these
4307bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4308 unsigned Opcode;
4309 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4310
4311 // Leave IR for LSE if subtarget supports it.
4312 if (Subtarget->hasLSE()) return false;
4313
4314 if (MemTy == MVT::i8)
4315 Opcode = AArch64::CMP_SWAP_8;
4316 else if (MemTy == MVT::i16)
4317 Opcode = AArch64::CMP_SWAP_16;
4318 else if (MemTy == MVT::i32)
4319 Opcode = AArch64::CMP_SWAP_32;
4320 else if (MemTy == MVT::i64)
4321 Opcode = AArch64::CMP_SWAP_64;
4322 else
4323 llvm_unreachable("Unknown AtomicCmpSwap type");
4324
4325 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4326 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4327 N->getOperand(0)};
4328 SDNode *CmpSwap = CurDAG->getMachineNode(
4329 Opcode, SDLoc(N),
4330 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4331
4332 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4333 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4334
4335 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4336 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4337 CurDAG->RemoveDeadNode(N);
4338
4339 return true;
4340}
4341
4342bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4343 SDValue &Shift, bool Negate) {
4344 if (!isa<ConstantSDNode>(N))
4345 return false;
4346
4347 SDLoc DL(N);
4348 APInt Val =
4349 cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4350
4351 if (Negate)
4352 Val = -Val;
4353
4354 switch (VT.SimpleTy) {
4355 case MVT::i8:
4356 // All immediates are supported.
4357 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4358 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4359 return true;
4360 case MVT::i16:
4361 case MVT::i32:
4362 case MVT::i64:
4363 // Support 8bit unsigned immediates.
4364 if ((Val & ~0xff) == 0) {
4365 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4366 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4367 return true;
4368 }
4369 // Support 16bit unsigned immediates that are a multiple of 256.
4370 if ((Val & ~0xff00) == 0) {
4371 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4372 Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
4373 return true;
4374 }
4375 break;
4376 default:
4377 break;
4378 }
4379
4380 return false;
4381}
4382
4383bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4384 SDValue &Imm, SDValue &Shift,
4385 bool Negate) {
4386 if (!isa<ConstantSDNode>(N))
4387 return false;
4388
4389 SDLoc DL(N);
4390 int64_t Val = cast<ConstantSDNode>(N)
4391 ->getAPIntValue()
4393 .getSExtValue();
4394
4395 if (Negate)
4396 Val = -Val;
4397
4398 // Signed saturating instructions treat their immediate operand as unsigned,
4399 // whereas the related intrinsics define their operands to be signed. This
4400 // means we can only use the immediate form when the operand is non-negative.
4401 if (Val < 0)
4402 return false;
4403
4404 switch (VT.SimpleTy) {
4405 case MVT::i8:
4406 // All positive immediates are supported.
4407 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4408 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4409 return true;
4410 case MVT::i16:
4411 case MVT::i32:
4412 case MVT::i64:
4413 // Support 8bit positive immediates.
4414 if (Val <= 255) {
4415 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4416 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4417 return true;
4418 }
4419 // Support 16bit positive immediates that are a multiple of 256.
4420 if (Val <= 65280 && Val % 256 == 0) {
4421 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4422 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4423 return true;
4424 }
4425 break;
4426 default:
4427 break;
4428 }
4429
4430 return false;
4431}
4432
4433bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4434 SDValue &Shift) {
4435 if (!isa<ConstantSDNode>(N))
4436 return false;
4437
4438 SDLoc DL(N);
4439 int64_t Val = cast<ConstantSDNode>(N)
4440 ->getAPIntValue()
4441 .trunc(VT.getFixedSizeInBits())
4442 .getSExtValue();
4443 int32_t ImmVal, ShiftVal;
4444 if (!AArch64_AM::isSVECpyDupImm(VT.getScalarSizeInBits(), Val, ImmVal,
4445 ShiftVal))
4446 return false;
4447
4448 Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32);
4449 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4450 return true;
4451}
4452
4453bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4454 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4455 int64_t ImmVal = CNode->getSExtValue();
4456 SDLoc DL(N);
4457 if (ImmVal >= -128 && ImmVal < 128) {
4458 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4459 return true;
4460 }
4461 }
4462 return false;
4463}
4464
4465bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4466 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4467 uint64_t ImmVal = CNode->getZExtValue();
4468
4469 switch (VT.SimpleTy) {
4470 case MVT::i8:
4471 ImmVal &= 0xFF;
4472 break;
4473 case MVT::i16:
4474 ImmVal &= 0xFFFF;
4475 break;
4476 case MVT::i32:
4477 ImmVal &= 0xFFFFFFFF;
4478 break;
4479 case MVT::i64:
4480 break;
4481 default:
4482 llvm_unreachable("Unexpected type");
4483 }
4484
4485 if (ImmVal < 256) {
4486 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4487 return true;
4488 }
4489 }
4490 return false;
4491}
4492
4493bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4494 bool Invert) {
4495 uint64_t ImmVal;
4496 if (auto CI = dyn_cast<ConstantSDNode>(N))
4497 ImmVal = CI->getZExtValue();
4498 else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
4499 ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
4500 else
4501 return false;
4502
4503 if (Invert)
4504 ImmVal = ~ImmVal;
4505
4506 uint64_t encoding;
4507 if (!AArch64_AM::isSVELogicalImm(VT.getScalarSizeInBits(), ImmVal, encoding))
4508 return false;
4509
4510 Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
4511 return true;
4512}
4513
4514// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4515// Rather than attempt to normalise everything we can sometimes saturate the
4516// shift amount during selection. This function also allows for consistent
4517// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4518// required by the instructions.
4519bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4520 uint64_t High, bool AllowSaturation,
4521 SDValue &Imm) {
4522 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4523 uint64_t ImmVal = CN->getZExtValue();
4524
4525 // Reject shift amounts that are too small.
4526 if (ImmVal < Low)
4527 return false;
4528
4529 // Reject or saturate shift amounts that are too big.
4530 if (ImmVal > High) {
4531 if (!AllowSaturation)
4532 return false;
4533 ImmVal = High;
4534 }
4535
4536 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4537 return true;
4538 }
4539
4540 return false;
4541}
4542
4543bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4544 // tagp(FrameIndex, IRGstack, tag_offset):
4545 // since the offset between FrameIndex and IRGstack is a compile-time
4546 // constant, this can be lowered to a single ADDG instruction.
4547 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4548 return false;
4549 }
4550
4551 SDValue IRG_SP = N->getOperand(2);
4552 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4553 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4554 return false;
4555 }
4556
4557 const TargetLowering *TLI = getTargetLowering();
4558 SDLoc DL(N);
4559 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4560 SDValue FiOp = CurDAG->getTargetFrameIndex(
4561 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4562 int TagOffset = N->getConstantOperandVal(3);
4563
4564 SDNode *Out = CurDAG->getMachineNode(
4565 AArch64::TAGPstack, DL, MVT::i64,
4566 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4567 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4568 ReplaceNode(N, Out);
4569 return true;
4570}
4571
4572void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4573 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4574 "llvm.aarch64.tagp third argument must be an immediate");
4575 if (trySelectStackSlotTagP(N))
4576 return;
4577 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4578 // compile-time constant, not just for stack allocations.
4579
4580 // General case for unrelated pointers in Op1 and Op2.
4581 SDLoc DL(N);
4582 int TagOffset = N->getConstantOperandVal(3);
4583 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4584 {N->getOperand(1), N->getOperand(2)});
4585 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4586 {SDValue(N1, 0), N->getOperand(2)});
4587 SDNode *N3 = CurDAG->getMachineNode(
4588 AArch64::ADDG, DL, MVT::i64,
4589 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4590 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4591 ReplaceNode(N, N3);
4592}
4593
4594bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4595 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4596
4597 // Bail when not a "cast" like insert_subvector.
4598 if (N->getConstantOperandVal(2) != 0)
4599 return false;
4600 if (!N->getOperand(0).isUndef())
4601 return false;
4602
4603 // Bail when normal isel should do the job.
4604 EVT VT = N->getValueType(0);
4605 EVT InVT = N->getOperand(1).getValueType();
4606 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4607 return false;
4608 if (InVT.getSizeInBits() <= 128)
4609 return false;
4610
4611 // NOTE: We can only get here when doing fixed length SVE code generation.
4612 // We do manual selection because the types involved are not linked to real
4613 // registers (despite being legal) and must be coerced into SVE registers.
4614
4616 "Expected to insert into a packed scalable vector!");
4617
4618 SDLoc DL(N);
4619 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4620 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4621 N->getOperand(1), RC));
4622 return true;
4623}
4624
4625bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4626 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4627
4628 // Bail when not a "cast" like extract_subvector.
4629 if (N->getConstantOperandVal(1) != 0)
4630 return false;
4631
4632 // Bail when normal isel can do the job.
4633 EVT VT = N->getValueType(0);
4634 EVT InVT = N->getOperand(0).getValueType();
4635 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4636 return false;
4637 if (VT.getSizeInBits() <= 128)
4638 return false;
4639
4640 // NOTE: We can only get here when doing fixed length SVE code generation.
4641 // We do manual selection because the types involved are not linked to real
4642 // registers (despite being legal) and must be coerced into SVE registers.
4643
4645 "Expected to extract from a packed scalable vector!");
4646
4647 SDLoc DL(N);
4648 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4649 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4650 N->getOperand(0), RC));
4651 return true;
4652}
4653
4654bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4655 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4656
4657 SDValue N0 = N->getOperand(0);
4658 SDValue N1 = N->getOperand(1);
4659
4660 EVT VT = N->getValueType(0);
4661 SDLoc DL(N);
4662
4663 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4664 // Rotate by a constant is a funnel shift in IR which is exanded to
4665 // an OR with shifted operands.
4666 // We do the following transform:
4667 // OR N0, N1 -> xar (x, y, imm)
4668 // Where:
4669 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4670 // N0 = SHL_PRED true, V, splat(bits-imm)
4671 // V = (xor x, y)
4672 if (VT.isScalableVector() &&
4673 (Subtarget->hasSVE2() ||
4674 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4675 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4676 N1.getOpcode() != AArch64ISD::SRL_PRED)
4677 std::swap(N0, N1);
4678 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4679 N1.getOpcode() != AArch64ISD::SRL_PRED)
4680 return false;
4681
4682 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4683 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4684 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4685 return false;
4686
4687 if (N0.getOperand(1) != N1.getOperand(1))
4688 return false;
4689
4690 SDValue R1, R2;
4691 bool IsXOROperand = true;
4692 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4693 IsXOROperand = false;
4694 } else {
4695 R1 = N0.getOperand(1).getOperand(0);
4696 R2 = N1.getOperand(1).getOperand(1);
4697 }
4698
4699 APInt ShlAmt, ShrAmt;
4700 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4702 return false;
4703
4704 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4705 return false;
4706
4707 if (!IsXOROperand) {
4708 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4709 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4710 SDValue MOVIV = SDValue(MOV, 0);
4711
4712 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4713 SDNode *SubRegToReg =
4714 CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, VT, MOVIV, ZSub);
4715
4716 R1 = N1->getOperand(1);
4717 R2 = SDValue(SubRegToReg, 0);
4718 }
4719
4720 SDValue Imm =
4721 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4722
4723 SDValue Ops[] = {R1, R2, Imm};
4725 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4726 AArch64::XAR_ZZZI_D})) {
4727 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4728 return true;
4729 }
4730 return false;
4731 }
4732
4733 // We have Neon SHA3 XAR operation for v2i64 but for types
4734 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4735 // is available.
4736 EVT SVT;
4737 switch (VT.getSimpleVT().SimpleTy) {
4738 case MVT::v4i32:
4739 case MVT::v2i32:
4740 SVT = MVT::nxv4i32;
4741 break;
4742 case MVT::v8i16:
4743 case MVT::v4i16:
4744 SVT = MVT::nxv8i16;
4745 break;
4746 case MVT::v16i8:
4747 case MVT::v8i8:
4748 SVT = MVT::nxv16i8;
4749 break;
4750 case MVT::v2i64:
4751 case MVT::v1i64:
4752 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4753 break;
4754 default:
4755 return false;
4756 }
4757
4758 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4759 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4760 return false;
4761
4762 if (N0->getOpcode() != AArch64ISD::VSHL ||
4763 N1->getOpcode() != AArch64ISD::VLSHR)
4764 return false;
4765
4766 if (N0->getOperand(0) != N1->getOperand(0))
4767 return false;
4768
4769 SDValue R1, R2;
4770 bool IsXOROperand = true;
4771 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4772 IsXOROperand = false;
4773 } else {
4774 SDValue XOR = N0.getOperand(0);
4775 R1 = XOR.getOperand(0);
4776 R2 = XOR.getOperand(1);
4777 }
4778
4779 unsigned HsAmt = N0.getConstantOperandVal(1);
4780 unsigned ShAmt = N1.getConstantOperandVal(1);
4781
4782 SDValue Imm = CurDAG->getTargetConstant(
4783 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4784
4785 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4786 if (ShAmt + HsAmt != VTSizeInBits)
4787 return false;
4788
4789 if (!IsXOROperand) {
4790 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4791 SDNode *MOV =
4792 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4793 SDValue MOVIV = SDValue(MOV, 0);
4794
4795 R1 = N1->getOperand(0);
4796 R2 = MOVIV;
4797 }
4798
4799 if (SVT != VT) {
4800 SDValue Undef =
4801 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4802
4803 if (SVT.isScalableVector() && VT.is64BitVector()) {
4804 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4805
4806 SDValue UndefQ = SDValue(
4807 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4808 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4809
4810 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4811 UndefQ, R1, DSub),
4812 0);
4813 if (R2.getValueType() == VT)
4814 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4815 UndefQ, R2, DSub),
4816 0);
4817 }
4818
4819 SDValue SubReg = CurDAG->getTargetConstant(
4820 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
4821
4822 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4823 R1, SubReg),
4824 0);
4825
4826 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4827 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4828 Undef, R2, SubReg),
4829 0);
4830 }
4831
4832 SDValue Ops[] = {R1, R2, Imm};
4833 SDNode *XAR = nullptr;
4834
4835 if (SVT.isScalableVector()) {
4837 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4838 AArch64::XAR_ZZZI_D}))
4839 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
4840 } else {
4841 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
4842 }
4843
4844 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4845
4846 if (SVT != VT) {
4847 if (VT.is64BitVector() && SVT.isScalableVector()) {
4848 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4849
4850 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4851 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
4852 SDValue(XAR, 0), ZSub);
4853
4854 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4855 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4856 SDValue(Q, 0), DSub);
4857 } else {
4858 SDValue SubReg = CurDAG->getTargetConstant(
4859 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4860 MVT::i32);
4861 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4862 SDValue(XAR, 0), SubReg);
4863 }
4864 }
4865 ReplaceNode(N, XAR);
4866 return true;
4867}
4868
4869void AArch64DAGToDAGISel::Select(SDNode *Node) {
4870 // If we have a custom node, we already have selected!
4871 if (Node->isMachineOpcode()) {
4872 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4873 Node->setNodeId(-1);
4874 return;
4875 }
4876
4877 // Few custom selection stuff.
4878 EVT VT = Node->getValueType(0);
4879
4880 switch (Node->getOpcode()) {
4881 default:
4882 break;
4883
4885 if (SelectCMP_SWAP(Node))
4886 return;
4887 break;
4888
4889 case ISD::READ_REGISTER:
4890 case AArch64ISD::MRRS:
4891 if (tryReadRegister(Node))
4892 return;
4893 break;
4894
4896 case AArch64ISD::MSRR:
4897 if (tryWriteRegister(Node))
4898 return;
4899 break;
4900
4901 case ISD::LOAD: {
4902 // Try to select as an indexed load. Fall through to normal processing
4903 // if we can't.
4904 if (tryIndexedLoad(Node))
4905 return;
4906 break;
4907 }
4908
4909 case ISD::SRL:
4910 case ISD::AND:
4911 case ISD::SRA:
4913 if (tryBitfieldExtractOp(Node))
4914 return;
4915 if (tryBitfieldInsertInZeroOp(Node))
4916 return;
4917 [[fallthrough]];
4918 case ISD::ROTR:
4919 case ISD::SHL:
4920 if (tryShiftAmountMod(Node))
4921 return;
4922 break;
4923
4924 case ISD::SIGN_EXTEND:
4925 if (tryBitfieldExtractOpFromSExt(Node))
4926 return;
4927 break;
4928
4929 case ISD::OR:
4930 if (tryBitfieldInsertOp(Node))
4931 return;
4932 if (trySelectXAR(Node))
4933 return;
4934 break;
4935
4937 if (trySelectCastScalableToFixedLengthVector(Node))
4938 return;
4939 break;
4940 }
4941
4942 case ISD::INSERT_SUBVECTOR: {
4943 if (trySelectCastFixedLengthToScalableVector(Node))
4944 return;
4945 break;
4946 }
4947
4948 case ISD::Constant: {
4949 // Materialize zero constants as copies from WZR/XZR. This allows
4950 // the coalescer to propagate these into other instructions.
4951 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4952 if (ConstNode->isZero()) {
4953 if (VT == MVT::i32) {
4954 SDValue New = CurDAG->getCopyFromReg(
4955 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4956 ReplaceNode(Node, New.getNode());
4957 return;
4958 } else if (VT == MVT::i64) {
4959 SDValue New = CurDAG->getCopyFromReg(
4960 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4961 ReplaceNode(Node, New.getNode());
4962 return;
4963 }
4964 }
4965 break;
4966 }
4967
4968 case ISD::FrameIndex: {
4969 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4970 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4971 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4972 const TargetLowering *TLI = getTargetLowering();
4973 SDValue TFI = CurDAG->getTargetFrameIndex(
4974 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4975 SDLoc DL(Node);
4976 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4977 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4978 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4979 return;
4980 }
4982 unsigned IntNo = Node->getConstantOperandVal(1);
4983 switch (IntNo) {
4984 default:
4985 break;
4986 case Intrinsic::aarch64_gcsss: {
4987 SDLoc DL(Node);
4988 SDValue Chain = Node->getOperand(0);
4989 SDValue Val = Node->getOperand(2);
4990 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4991 SDNode *SS1 =
4992 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4993 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4994 MVT::Other, Zero, SDValue(SS1, 0));
4995 ReplaceNode(Node, SS2);
4996 return;
4997 }
4998 case Intrinsic::aarch64_ldaxp:
4999 case Intrinsic::aarch64_ldxp: {
5000 unsigned Op =
5001 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
5002 SDValue MemAddr = Node->getOperand(2);
5003 SDLoc DL(Node);
5004 SDValue Chain = Node->getOperand(0);
5005
5006 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
5007 MVT::Other, MemAddr, Chain);
5008
5009 // Transfer memoperands.
5010 MachineMemOperand *MemOp =
5011 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
5012 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
5013 ReplaceNode(Node, Ld);
5014 return;
5015 }
5016 case Intrinsic::aarch64_stlxp:
5017 case Intrinsic::aarch64_stxp: {
5018 unsigned Op =
5019 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
5020 SDLoc DL(Node);
5021 SDValue Chain = Node->getOperand(0);
5022 SDValue ValLo = Node->getOperand(2);
5023 SDValue ValHi = Node->getOperand(3);
5024 SDValue MemAddr = Node->getOperand(4);
5025
5026 // Place arguments in the right order.
5027 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
5028
5029 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
5030 // Transfer memoperands.
5031 MachineMemOperand *MemOp =
5032 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
5033 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
5034
5035 ReplaceNode(Node, St);
5036 return;
5037 }
5038 case Intrinsic::aarch64_neon_ld1x2:
5039 if (VT == MVT::v8i8) {
5040 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
5041 return;
5042 } else if (VT == MVT::v16i8) {
5043 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
5044 return;
5045 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5046 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
5047 return;
5048 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5049 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
5050 return;
5051 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5052 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
5053 return;
5054 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5055 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
5056 return;
5057 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5058 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5059 return;
5060 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5061 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
5062 return;
5063 }
5064 break;
5065 case Intrinsic::aarch64_neon_ld1x3:
5066 if (VT == MVT::v8i8) {
5067 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
5068 return;
5069 } else if (VT == MVT::v16i8) {
5070 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
5071 return;
5072 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5073 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
5074 return;
5075 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5076 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
5077 return;
5078 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5079 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
5080 return;
5081 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5082 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
5083 return;
5084 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5085 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5086 return;
5087 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5088 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5089 return;
5090 }
5091 break;
5092 case Intrinsic::aarch64_neon_ld1x4:
5093 if (VT == MVT::v8i8) {
5094 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5095 return;
5096 } else if (VT == MVT::v16i8) {
5097 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5098 return;
5099 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5100 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5101 return;
5102 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5103 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5104 return;
5105 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5106 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5107 return;
5108 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5109 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5110 return;
5111 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5112 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5113 return;
5114 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5115 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5116 return;
5117 }
5118 break;
5119 case Intrinsic::aarch64_neon_ld2:
5120 if (VT == MVT::v8i8) {
5121 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5122 return;
5123 } else if (VT == MVT::v16i8) {
5124 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5125 return;
5126 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5127 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5128 return;
5129 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5130 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5131 return;
5132 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5133 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5134 return;
5135 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5136 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5137 return;
5138 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5139 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5140 return;
5141 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5142 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5143 return;
5144 }
5145 break;
5146 case Intrinsic::aarch64_neon_ld3:
5147 if (VT == MVT::v8i8) {
5148 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5149 return;
5150 } else if (VT == MVT::v16i8) {
5151 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5152 return;
5153 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5154 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5155 return;
5156 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5157 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5158 return;
5159 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5160 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5161 return;
5162 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5163 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5164 return;
5165 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5166 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5167 return;
5168 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5169 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5170 return;
5171 }
5172 break;
5173 case Intrinsic::aarch64_neon_ld4:
5174 if (VT == MVT::v8i8) {
5175 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5176 return;
5177 } else if (VT == MVT::v16i8) {
5178 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5179 return;
5180 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5181 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5182 return;
5183 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5184 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5185 return;
5186 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5187 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5188 return;
5189 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5190 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5191 return;
5192 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5193 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5194 return;
5195 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5196 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5197 return;
5198 }
5199 break;
5200 case Intrinsic::aarch64_neon_ld2r:
5201 if (VT == MVT::v8i8) {
5202 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5203 return;
5204 } else if (VT == MVT::v16i8) {
5205 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5206 return;
5207 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5208 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5209 return;
5210 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5211 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5212 return;
5213 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5214 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5215 return;
5216 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5217 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5218 return;
5219 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5220 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5221 return;
5222 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5223 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5224 return;
5225 }
5226 break;
5227 case Intrinsic::aarch64_neon_ld3r:
5228 if (VT == MVT::v8i8) {
5229 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5230 return;
5231 } else if (VT == MVT::v16i8) {
5232 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5233 return;
5234 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5235 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5236 return;
5237 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5238 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5239 return;
5240 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5241 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5242 return;
5243 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5244 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5245 return;
5246 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5247 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5248 return;
5249 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5250 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5251 return;
5252 }
5253 break;
5254 case Intrinsic::aarch64_neon_ld4r:
5255 if (VT == MVT::v8i8) {
5256 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5257 return;
5258 } else if (VT == MVT::v16i8) {
5259 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5260 return;
5261 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5262 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5263 return;
5264 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5265 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5266 return;
5267 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5268 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5269 return;
5270 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5271 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5272 return;
5273 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5274 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5275 return;
5276 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5277 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5278 return;
5279 }
5280 break;
5281 case Intrinsic::aarch64_neon_ld2lane:
5282 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5283 SelectLoadLane(Node, 2, AArch64::LD2i8);
5284 return;
5285 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5286 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5287 SelectLoadLane(Node, 2, AArch64::LD2i16);
5288 return;
5289 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5290 VT == MVT::v2f32) {
5291 SelectLoadLane(Node, 2, AArch64::LD2i32);
5292 return;
5293 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5294 VT == MVT::v1f64) {
5295 SelectLoadLane(Node, 2, AArch64::LD2i64);
5296 return;
5297 }
5298 break;
5299 case Intrinsic::aarch64_neon_ld3lane:
5300 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5301 SelectLoadLane(Node, 3, AArch64::LD3i8);
5302 return;
5303 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5304 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5305 SelectLoadLane(Node, 3, AArch64::LD3i16);
5306 return;
5307 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5308 VT == MVT::v2f32) {
5309 SelectLoadLane(Node, 3, AArch64::LD3i32);
5310 return;
5311 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5312 VT == MVT::v1f64) {
5313 SelectLoadLane(Node, 3, AArch64::LD3i64);
5314 return;
5315 }
5316 break;
5317 case Intrinsic::aarch64_neon_ld4lane:
5318 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5319 SelectLoadLane(Node, 4, AArch64::LD4i8);
5320 return;
5321 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5322 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5323 SelectLoadLane(Node, 4, AArch64::LD4i16);
5324 return;
5325 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5326 VT == MVT::v2f32) {
5327 SelectLoadLane(Node, 4, AArch64::LD4i32);
5328 return;
5329 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5330 VT == MVT::v1f64) {
5331 SelectLoadLane(Node, 4, AArch64::LD4i64);
5332 return;
5333 }
5334 break;
5335 case Intrinsic::aarch64_ld64b:
5336 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5337 return;
5338 case Intrinsic::aarch64_sve_ld2q_sret: {
5339 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5340 return;
5341 }
5342 case Intrinsic::aarch64_sve_ld3q_sret: {
5343 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5344 return;
5345 }
5346 case Intrinsic::aarch64_sve_ld4q_sret: {
5347 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5348 return;
5349 }
5350 case Intrinsic::aarch64_sve_ld2_sret: {
5351 if (VT == MVT::nxv16i8) {
5352 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5353 true);
5354 return;
5355 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5356 VT == MVT::nxv8bf16) {
5357 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5358 true);
5359 return;
5360 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5361 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5362 true);
5363 return;
5364 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5365 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5366 true);
5367 return;
5368 }
5369 break;
5370 }
5371 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5372 if (VT == MVT::nxv16i8) {
5373 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5374 SelectContiguousMultiVectorLoad(
5375 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5376 else if (Subtarget->hasSVE2p1())
5377 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5378 AArch64::LD1B_2Z);
5379 else
5380 break;
5381 return;
5382 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5383 VT == MVT::nxv8bf16) {
5384 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5385 SelectContiguousMultiVectorLoad(
5386 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5387 else if (Subtarget->hasSVE2p1())
5388 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5389 AArch64::LD1H_2Z);
5390 else
5391 break;
5392 return;
5393 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5394 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5395 SelectContiguousMultiVectorLoad(
5396 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5397 else if (Subtarget->hasSVE2p1())
5398 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5399 AArch64::LD1W_2Z);
5400 else
5401 break;
5402 return;
5403 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5404 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5405 SelectContiguousMultiVectorLoad(
5406 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5407 else if (Subtarget->hasSVE2p1())
5408 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5409 AArch64::LD1D_2Z);
5410 else
5411 break;
5412 return;
5413 }
5414 break;
5415 }
5416 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5417 if (VT == MVT::nxv16i8) {
5418 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5419 SelectContiguousMultiVectorLoad(
5420 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5421 else if (Subtarget->hasSVE2p1())
5422 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5423 AArch64::LD1B_4Z);
5424 else
5425 break;
5426 return;
5427 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5428 VT == MVT::nxv8bf16) {
5429 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5430 SelectContiguousMultiVectorLoad(
5431 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5432 else if (Subtarget->hasSVE2p1())
5433 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5434 AArch64::LD1H_4Z);
5435 else
5436 break;
5437 return;
5438 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5439 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5440 SelectContiguousMultiVectorLoad(
5441 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5442 else if (Subtarget->hasSVE2p1())
5443 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5444 AArch64::LD1W_4Z);
5445 else
5446 break;
5447 return;
5448 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5449 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5450 SelectContiguousMultiVectorLoad(
5451 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5452 else if (Subtarget->hasSVE2p1())
5453 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5454 AArch64::LD1D_4Z);
5455 else
5456 break;
5457 return;
5458 }
5459 break;
5460 }
5461 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5462 if (VT == MVT::nxv16i8) {
5463 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5464 SelectContiguousMultiVectorLoad(Node, 2, 0,
5465 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5466 AArch64::LDNT1B_2Z_PSEUDO);
5467 else if (Subtarget->hasSVE2p1())
5468 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5469 AArch64::LDNT1B_2Z);
5470 else
5471 break;
5472 return;
5473 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5474 VT == MVT::nxv8bf16) {
5475 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5476 SelectContiguousMultiVectorLoad(Node, 2, 1,
5477 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5478 AArch64::LDNT1H_2Z_PSEUDO);
5479 else if (Subtarget->hasSVE2p1())
5480 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5481 AArch64::LDNT1H_2Z);
5482 else
5483 break;
5484 return;
5485 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5486 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5487 SelectContiguousMultiVectorLoad(Node, 2, 2,
5488 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5489 AArch64::LDNT1W_2Z_PSEUDO);
5490 else if (Subtarget->hasSVE2p1())
5491 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5492 AArch64::LDNT1W_2Z);
5493 else
5494 break;
5495 return;
5496 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5497 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5498 SelectContiguousMultiVectorLoad(Node, 2, 3,
5499 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5500 AArch64::LDNT1D_2Z_PSEUDO);
5501 else if (Subtarget->hasSVE2p1())
5502 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5503 AArch64::LDNT1D_2Z);
5504 else
5505 break;
5506 return;
5507 }
5508 break;
5509 }
5510 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5511 if (VT == MVT::nxv16i8) {
5512 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5513 SelectContiguousMultiVectorLoad(Node, 4, 0,
5514 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5515 AArch64::LDNT1B_4Z_PSEUDO);
5516 else if (Subtarget->hasSVE2p1())
5517 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5518 AArch64::LDNT1B_4Z);
5519 else
5520 break;
5521 return;
5522 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5523 VT == MVT::nxv8bf16) {
5524 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5525 SelectContiguousMultiVectorLoad(Node, 4, 1,
5526 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5527 AArch64::LDNT1H_4Z_PSEUDO);
5528 else if (Subtarget->hasSVE2p1())
5529 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5530 AArch64::LDNT1H_4Z);
5531 else
5532 break;
5533 return;
5534 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5535 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5536 SelectContiguousMultiVectorLoad(Node, 4, 2,
5537 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5538 AArch64::LDNT1W_4Z_PSEUDO);
5539 else if (Subtarget->hasSVE2p1())
5540 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5541 AArch64::LDNT1W_4Z);
5542 else
5543 break;
5544 return;
5545 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5546 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5547 SelectContiguousMultiVectorLoad(Node, 4, 3,
5548 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5549 AArch64::LDNT1D_4Z_PSEUDO);
5550 else if (Subtarget->hasSVE2p1())
5551 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5552 AArch64::LDNT1D_4Z);
5553 else
5554 break;
5555 return;
5556 }
5557 break;
5558 }
5559 case Intrinsic::aarch64_sve_ld3_sret: {
5560 if (VT == MVT::nxv16i8) {
5561 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5562 true);
5563 return;
5564 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5565 VT == MVT::nxv8bf16) {
5566 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5567 true);
5568 return;
5569 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5570 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5571 true);
5572 return;
5573 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5574 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5575 true);
5576 return;
5577 }
5578 break;
5579 }
5580 case Intrinsic::aarch64_sve_ld4_sret: {
5581 if (VT == MVT::nxv16i8) {
5582 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5583 true);
5584 return;
5585 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5586 VT == MVT::nxv8bf16) {
5587 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5588 true);
5589 return;
5590 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5591 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5592 true);
5593 return;
5594 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5595 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5596 true);
5597 return;
5598 }
5599 break;
5600 }
5601 case Intrinsic::aarch64_sme_read_hor_vg2: {
5602 if (VT == MVT::nxv16i8) {
5603 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5604 AArch64::MOVA_2ZMXI_H_B);
5605 return;
5606 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5607 VT == MVT::nxv8bf16) {
5608 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5609 AArch64::MOVA_2ZMXI_H_H);
5610 return;
5611 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5612 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5613 AArch64::MOVA_2ZMXI_H_S);
5614 return;
5615 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5616 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5617 AArch64::MOVA_2ZMXI_H_D);
5618 return;
5619 }
5620 break;
5621 }
5622 case Intrinsic::aarch64_sme_read_ver_vg2: {
5623 if (VT == MVT::nxv16i8) {
5624 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5625 AArch64::MOVA_2ZMXI_V_B);
5626 return;
5627 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5628 VT == MVT::nxv8bf16) {
5629 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5630 AArch64::MOVA_2ZMXI_V_H);
5631 return;
5632 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5633 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5634 AArch64::MOVA_2ZMXI_V_S);
5635 return;
5636 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5637 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5638 AArch64::MOVA_2ZMXI_V_D);
5639 return;
5640 }
5641 break;
5642 }
5643 case Intrinsic::aarch64_sme_read_hor_vg4: {
5644 if (VT == MVT::nxv16i8) {
5645 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5646 AArch64::MOVA_4ZMXI_H_B);
5647 return;
5648 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5649 VT == MVT::nxv8bf16) {
5650 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5651 AArch64::MOVA_4ZMXI_H_H);
5652 return;
5653 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5654 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5655 AArch64::MOVA_4ZMXI_H_S);
5656 return;
5657 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5658 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5659 AArch64::MOVA_4ZMXI_H_D);
5660 return;
5661 }
5662 break;
5663 }
5664 case Intrinsic::aarch64_sme_read_ver_vg4: {
5665 if (VT == MVT::nxv16i8) {
5666 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5667 AArch64::MOVA_4ZMXI_V_B);
5668 return;
5669 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5670 VT == MVT::nxv8bf16) {
5671 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5672 AArch64::MOVA_4ZMXI_V_H);
5673 return;
5674 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5675 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5676 AArch64::MOVA_4ZMXI_V_S);
5677 return;
5678 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5679 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5680 AArch64::MOVA_4ZMXI_V_D);
5681 return;
5682 }
5683 break;
5684 }
5685 case Intrinsic::aarch64_sme_read_vg1x2: {
5686 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5687 AArch64::MOVA_VG2_2ZMXI);
5688 return;
5689 }
5690 case Intrinsic::aarch64_sme_read_vg1x4: {
5691 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5692 AArch64::MOVA_VG4_4ZMXI);
5693 return;
5694 }
5695 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5696 if (VT == MVT::nxv16i8) {
5697 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5698 return;
5699 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5700 VT == MVT::nxv8bf16) {
5701 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5702 return;
5703 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5704 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5705 return;
5706 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5707 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5708 return;
5709 }
5710 break;
5711 }
5712 case Intrinsic::aarch64_sme_readz_vert_x2: {
5713 if (VT == MVT::nxv16i8) {
5714 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5715 return;
5716 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5717 VT == MVT::nxv8bf16) {
5718 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5719 return;
5720 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5721 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5722 return;
5723 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5724 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5725 return;
5726 }
5727 break;
5728 }
5729 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5730 if (VT == MVT::nxv16i8) {
5731 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5732 return;
5733 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5734 VT == MVT::nxv8bf16) {
5735 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5736 return;
5737 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5738 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5739 return;
5740 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5741 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5742 return;
5743 }
5744 break;
5745 }
5746 case Intrinsic::aarch64_sme_readz_vert_x4: {
5747 if (VT == MVT::nxv16i8) {
5748 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5749 return;
5750 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5751 VT == MVT::nxv8bf16) {
5752 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5753 return;
5754 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5755 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5756 return;
5757 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5758 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5759 return;
5760 }
5761 break;
5762 }
5763 case Intrinsic::aarch64_sme_readz_x2: {
5764 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5765 AArch64::ZA);
5766 return;
5767 }
5768 case Intrinsic::aarch64_sme_readz_x4: {
5769 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5770 AArch64::ZA);
5771 return;
5772 }
5773 case Intrinsic::swift_async_context_addr: {
5774 SDLoc DL(Node);
5775 SDValue Chain = Node->getOperand(0);
5776 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5777 SDValue Res = SDValue(
5778 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5779 CurDAG->getTargetConstant(8, DL, MVT::i32),
5780 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5781 0);
5782 ReplaceUses(SDValue(Node, 0), Res);
5783 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5784 CurDAG->RemoveDeadNode(Node);
5785
5786 auto &MF = CurDAG->getMachineFunction();
5787 MF.getFrameInfo().setFrameAddressIsTaken(true);
5788 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5789 return;
5790 }
5791 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5793 Node->getValueType(0),
5794 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5795 AArch64::LUTI2_4ZTZI_S}))
5796 // Second Immediate must be <= 3:
5797 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5798 return;
5799 }
5800 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5802 Node->getValueType(0),
5803 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5804 // Second Immediate must be <= 1:
5805 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5806 return;
5807 }
5808 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5810 Node->getValueType(0),
5811 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5812 AArch64::LUTI2_2ZTZI_S}))
5813 // Second Immediate must be <= 7:
5814 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5815 return;
5816 }
5817 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5819 Node->getValueType(0),
5820 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5821 AArch64::LUTI4_2ZTZI_S}))
5822 // Second Immediate must be <= 3:
5823 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5824 return;
5825 }
5826 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5827 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5828 return;
5829 }
5830 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5832 Node->getValueType(0),
5833 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5834 SelectCVTIntrinsicFP8(Node, 2, Opc);
5835 return;
5836 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5838 Node->getValueType(0),
5839 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5840 SelectCVTIntrinsicFP8(Node, 2, Opc);
5841 return;
5842 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5844 Node->getValueType(0),
5845 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5846 SelectCVTIntrinsicFP8(Node, 2, Opc);
5847 return;
5848 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5850 Node->getValueType(0),
5851 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5852 SelectCVTIntrinsicFP8(Node, 2, Opc);
5853 return;
5854 case Intrinsic::ptrauth_resign_load_relative:
5855 SelectPtrauthResign(Node);
5856 return;
5857 }
5858 } break;
5860 unsigned IntNo = Node->getConstantOperandVal(0);
5861 switch (IntNo) {
5862 default:
5863 break;
5864 case Intrinsic::aarch64_tagp:
5865 SelectTagP(Node);
5866 return;
5867
5868 case Intrinsic::ptrauth_auth:
5869 SelectPtrauthAuth(Node);
5870 return;
5871
5872 case Intrinsic::ptrauth_resign:
5873 SelectPtrauthResign(Node);
5874 return;
5875
5876 case Intrinsic::aarch64_neon_tbl2:
5877 SelectTable(Node, 2,
5878 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5879 false);
5880 return;
5881 case Intrinsic::aarch64_neon_tbl3:
5882 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5883 : AArch64::TBLv16i8Three,
5884 false);
5885 return;
5886 case Intrinsic::aarch64_neon_tbl4:
5887 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5888 : AArch64::TBLv16i8Four,
5889 false);
5890 return;
5891 case Intrinsic::aarch64_neon_tbx2:
5892 SelectTable(Node, 2,
5893 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5894 true);
5895 return;
5896 case Intrinsic::aarch64_neon_tbx3:
5897 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5898 : AArch64::TBXv16i8Three,
5899 true);
5900 return;
5901 case Intrinsic::aarch64_neon_tbx4:
5902 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5903 : AArch64::TBXv16i8Four,
5904 true);
5905 return;
5906 case Intrinsic::aarch64_sve_srshl_single_x2:
5908 Node->getValueType(0),
5909 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5910 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5911 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5912 return;
5913 case Intrinsic::aarch64_sve_srshl_single_x4:
5915 Node->getValueType(0),
5916 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5917 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5918 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5919 return;
5920 case Intrinsic::aarch64_sve_urshl_single_x2:
5922 Node->getValueType(0),
5923 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5924 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5925 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5926 return;
5927 case Intrinsic::aarch64_sve_urshl_single_x4:
5929 Node->getValueType(0),
5930 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5931 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5932 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5933 return;
5934 case Intrinsic::aarch64_sve_srshl_x2:
5936 Node->getValueType(0),
5937 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5938 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5939 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5940 return;
5941 case Intrinsic::aarch64_sve_srshl_x4:
5943 Node->getValueType(0),
5944 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5945 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5946 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5947 return;
5948 case Intrinsic::aarch64_sve_urshl_x2:
5950 Node->getValueType(0),
5951 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5952 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5953 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5954 return;
5955 case Intrinsic::aarch64_sve_urshl_x4:
5957 Node->getValueType(0),
5958 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5959 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5960 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5961 return;
5962 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5964 Node->getValueType(0),
5965 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5966 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5967 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5968 return;
5969 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5971 Node->getValueType(0),
5972 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5973 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5974 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5975 return;
5976 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5978 Node->getValueType(0),
5979 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5980 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5981 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5982 return;
5983 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5985 Node->getValueType(0),
5986 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5987 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5988 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5989 return;
5990 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
5992 Node->getValueType(0),
5993 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
5994 AArch64::FSCALE_2ZZ_D}))
5995 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5996 return;
5997 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
5999 Node->getValueType(0),
6000 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
6001 AArch64::FSCALE_4ZZ_D}))
6002 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6003 return;
6004 case Intrinsic::aarch64_sme_fp8_scale_x2:
6006 Node->getValueType(0),
6007 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
6008 AArch64::FSCALE_2Z2Z_D}))
6009 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6010 return;
6011 case Intrinsic::aarch64_sme_fp8_scale_x4:
6013 Node->getValueType(0),
6014 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
6015 AArch64::FSCALE_4Z4Z_D}))
6016 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6017 return;
6018 case Intrinsic::aarch64_sve_whilege_x2:
6020 Node->getValueType(0),
6021 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
6022 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
6023 SelectWhilePair(Node, Op);
6024 return;
6025 case Intrinsic::aarch64_sve_whilegt_x2:
6027 Node->getValueType(0),
6028 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
6029 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
6030 SelectWhilePair(Node, Op);
6031 return;
6032 case Intrinsic::aarch64_sve_whilehi_x2:
6034 Node->getValueType(0),
6035 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
6036 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
6037 SelectWhilePair(Node, Op);
6038 return;
6039 case Intrinsic::aarch64_sve_whilehs_x2:
6041 Node->getValueType(0),
6042 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
6043 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
6044 SelectWhilePair(Node, Op);
6045 return;
6046 case Intrinsic::aarch64_sve_whilele_x2:
6048 Node->getValueType(0),
6049 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
6050 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
6051 SelectWhilePair(Node, Op);
6052 return;
6053 case Intrinsic::aarch64_sve_whilelo_x2:
6055 Node->getValueType(0),
6056 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
6057 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
6058 SelectWhilePair(Node, Op);
6059 return;
6060 case Intrinsic::aarch64_sve_whilels_x2:
6062 Node->getValueType(0),
6063 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
6064 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
6065 SelectWhilePair(Node, Op);
6066 return;
6067 case Intrinsic::aarch64_sve_whilelt_x2:
6069 Node->getValueType(0),
6070 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
6071 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
6072 SelectWhilePair(Node, Op);
6073 return;
6074 case Intrinsic::aarch64_sve_smax_single_x2:
6076 Node->getValueType(0),
6077 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6078 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6079 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6080 return;
6081 case Intrinsic::aarch64_sve_umax_single_x2:
6083 Node->getValueType(0),
6084 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6085 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6086 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6087 return;
6088 case Intrinsic::aarch64_sve_fmax_single_x2:
6090 Node->getValueType(0),
6091 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6092 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6093 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6094 return;
6095 case Intrinsic::aarch64_sve_smax_single_x4:
6097 Node->getValueType(0),
6098 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6099 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6100 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6101 return;
6102 case Intrinsic::aarch64_sve_umax_single_x4:
6104 Node->getValueType(0),
6105 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6106 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6107 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6108 return;
6109 case Intrinsic::aarch64_sve_fmax_single_x4:
6111 Node->getValueType(0),
6112 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6113 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6114 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6115 return;
6116 case Intrinsic::aarch64_sve_smin_single_x2:
6118 Node->getValueType(0),
6119 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6120 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6121 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6122 return;
6123 case Intrinsic::aarch64_sve_umin_single_x2:
6125 Node->getValueType(0),
6126 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6127 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6128 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6129 return;
6130 case Intrinsic::aarch64_sve_fmin_single_x2:
6132 Node->getValueType(0),
6133 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6134 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6135 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6136 return;
6137 case Intrinsic::aarch64_sve_smin_single_x4:
6139 Node->getValueType(0),
6140 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6141 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6142 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6143 return;
6144 case Intrinsic::aarch64_sve_umin_single_x4:
6146 Node->getValueType(0),
6147 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6148 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6149 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6150 return;
6151 case Intrinsic::aarch64_sve_fmin_single_x4:
6153 Node->getValueType(0),
6154 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6155 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6156 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6157 return;
6158 case Intrinsic::aarch64_sve_smax_x2:
6160 Node->getValueType(0),
6161 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6162 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6163 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6164 return;
6165 case Intrinsic::aarch64_sve_umax_x2:
6167 Node->getValueType(0),
6168 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6169 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6170 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6171 return;
6172 case Intrinsic::aarch64_sve_fmax_x2:
6174 Node->getValueType(0),
6175 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6176 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6177 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6178 return;
6179 case Intrinsic::aarch64_sve_smax_x4:
6181 Node->getValueType(0),
6182 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6183 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6184 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6185 return;
6186 case Intrinsic::aarch64_sve_umax_x4:
6188 Node->getValueType(0),
6189 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6190 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6191 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6192 return;
6193 case Intrinsic::aarch64_sve_fmax_x4:
6195 Node->getValueType(0),
6196 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6197 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6198 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6199 return;
6200 case Intrinsic::aarch64_sme_famax_x2:
6202 Node->getValueType(0),
6203 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6204 AArch64::FAMAX_2Z2Z_D}))
6205 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6206 return;
6207 case Intrinsic::aarch64_sme_famax_x4:
6209 Node->getValueType(0),
6210 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6211 AArch64::FAMAX_4Z4Z_D}))
6212 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6213 return;
6214 case Intrinsic::aarch64_sme_famin_x2:
6216 Node->getValueType(0),
6217 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6218 AArch64::FAMIN_2Z2Z_D}))
6219 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6220 return;
6221 case Intrinsic::aarch64_sme_famin_x4:
6223 Node->getValueType(0),
6224 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6225 AArch64::FAMIN_4Z4Z_D}))
6226 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6227 return;
6228 case Intrinsic::aarch64_sve_smin_x2:
6230 Node->getValueType(0),
6231 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6232 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6233 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6234 return;
6235 case Intrinsic::aarch64_sve_umin_x2:
6237 Node->getValueType(0),
6238 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6239 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6240 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6241 return;
6242 case Intrinsic::aarch64_sve_fmin_x2:
6244 Node->getValueType(0),
6245 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6246 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6247 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6248 return;
6249 case Intrinsic::aarch64_sve_smin_x4:
6251 Node->getValueType(0),
6252 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6253 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6254 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6255 return;
6256 case Intrinsic::aarch64_sve_umin_x4:
6258 Node->getValueType(0),
6259 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6260 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6261 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6262 return;
6263 case Intrinsic::aarch64_sve_fmin_x4:
6265 Node->getValueType(0),
6266 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6267 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6268 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6269 return;
6270 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6272 Node->getValueType(0),
6273 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6274 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6275 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6276 return;
6277 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6279 Node->getValueType(0),
6280 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6281 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6282 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6283 return;
6284 case Intrinsic::aarch64_sve_fminnm_single_x2:
6286 Node->getValueType(0),
6287 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6288 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6289 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6290 return;
6291 case Intrinsic::aarch64_sve_fminnm_single_x4:
6293 Node->getValueType(0),
6294 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6295 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6296 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6297 return;
6298 case Intrinsic::aarch64_sve_fscale_single_x4:
6299 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::BFSCALE_4ZZ);
6300 return;
6301 case Intrinsic::aarch64_sve_fscale_single_x2:
6302 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::BFSCALE_2ZZ);
6303 return;
6304 case Intrinsic::aarch64_sve_fmul_single_x4:
6306 Node->getValueType(0),
6307 {AArch64::BFMUL_4ZZ, AArch64::FMUL_4ZZ_H, AArch64::FMUL_4ZZ_S,
6308 AArch64::FMUL_4ZZ_D}))
6309 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6310 return;
6311 case Intrinsic::aarch64_sve_fmul_single_x2:
6313 Node->getValueType(0),
6314 {AArch64::BFMUL_2ZZ, AArch64::FMUL_2ZZ_H, AArch64::FMUL_2ZZ_S,
6315 AArch64::FMUL_2ZZ_D}))
6316 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6317 return;
6318 case Intrinsic::aarch64_sve_fmaxnm_x2:
6320 Node->getValueType(0),
6321 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6322 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6323 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6324 return;
6325 case Intrinsic::aarch64_sve_fmaxnm_x4:
6327 Node->getValueType(0),
6328 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6329 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6330 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6331 return;
6332 case Intrinsic::aarch64_sve_fminnm_x2:
6334 Node->getValueType(0),
6335 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6336 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6337 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6338 return;
6339 case Intrinsic::aarch64_sve_fminnm_x4:
6341 Node->getValueType(0),
6342 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6343 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6344 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6345 return;
6346 case Intrinsic::aarch64_sve_aese_lane_x2:
6347 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESE_2ZZI_B);
6348 return;
6349 case Intrinsic::aarch64_sve_aesd_lane_x2:
6350 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESD_2ZZI_B);
6351 return;
6352 case Intrinsic::aarch64_sve_aesemc_lane_x2:
6353 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESEMC_2ZZI_B);
6354 return;
6355 case Intrinsic::aarch64_sve_aesdimc_lane_x2:
6356 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESDIMC_2ZZI_B);
6357 return;
6358 case Intrinsic::aarch64_sve_aese_lane_x4:
6359 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESE_4ZZI_B);
6360 return;
6361 case Intrinsic::aarch64_sve_aesd_lane_x4:
6362 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESD_4ZZI_B);
6363 return;
6364 case Intrinsic::aarch64_sve_aesemc_lane_x4:
6365 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESEMC_4ZZI_B);
6366 return;
6367 case Intrinsic::aarch64_sve_aesdimc_lane_x4:
6368 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESDIMC_4ZZI_B);
6369 return;
6370 case Intrinsic::aarch64_sve_pmlal_pair_x2:
6371 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::PMLAL_2ZZZ_Q);
6372 return;
6373 case Intrinsic::aarch64_sve_pmull_pair_x2: {
6374 SDLoc DL(Node);
6375 SmallVector<SDValue, 4> Regs(Node->ops().slice(1, 2));
6376 SDNode *Res =
6377 CurDAG->getMachineNode(AArch64::PMULL_2ZZZ_Q, DL, MVT::Untyped, Regs);
6378 SDValue SuperReg = SDValue(Res, 0);
6379 for (unsigned I = 0; I < 2; I++)
6380 ReplaceUses(SDValue(Node, I),
6381 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
6382 SuperReg));
6383 CurDAG->RemoveDeadNode(Node);
6384 return;
6385 }
6386 case Intrinsic::aarch64_sve_fscale_x4:
6387 SelectDestructiveMultiIntrinsic(Node, 4, true, AArch64::BFSCALE_4Z4Z);
6388 return;
6389 case Intrinsic::aarch64_sve_fscale_x2:
6390 SelectDestructiveMultiIntrinsic(Node, 2, true, AArch64::BFSCALE_2Z2Z);
6391 return;
6392 case Intrinsic::aarch64_sve_fmul_x4:
6394 Node->getValueType(0),
6395 {AArch64::BFMUL_4Z4Z, AArch64::FMUL_4Z4Z_H, AArch64::FMUL_4Z4Z_S,
6396 AArch64::FMUL_4Z4Z_D}))
6397 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6398 return;
6399 case Intrinsic::aarch64_sve_fmul_x2:
6401 Node->getValueType(0),
6402 {AArch64::BFMUL_2Z2Z, AArch64::FMUL_2Z2Z_H, AArch64::FMUL_2Z2Z_S,
6403 AArch64::FMUL_2Z2Z_D}))
6404 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6405 return;
6406 case Intrinsic::aarch64_sve_fcvtzs_x2:
6407 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6408 return;
6409 case Intrinsic::aarch64_sve_scvtf_x2:
6410 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6411 return;
6412 case Intrinsic::aarch64_sve_fcvtzu_x2:
6413 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6414 return;
6415 case Intrinsic::aarch64_sve_ucvtf_x2:
6416 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6417 return;
6418 case Intrinsic::aarch64_sve_fcvtzs_x4:
6419 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6420 return;
6421 case Intrinsic::aarch64_sve_scvtf_x4:
6422 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6423 return;
6424 case Intrinsic::aarch64_sve_fcvtzu_x4:
6425 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6426 return;
6427 case Intrinsic::aarch64_sve_ucvtf_x4:
6428 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6429 return;
6430 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6431 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6432 return;
6433 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6434 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6435 return;
6436 case Intrinsic::aarch64_sve_sclamp_single_x2:
6438 Node->getValueType(0),
6439 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6440 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6441 SelectClamp(Node, 2, Op);
6442 return;
6443 case Intrinsic::aarch64_sve_uclamp_single_x2:
6445 Node->getValueType(0),
6446 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6447 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6448 SelectClamp(Node, 2, Op);
6449 return;
6450 case Intrinsic::aarch64_sve_fclamp_single_x2:
6452 Node->getValueType(0),
6453 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6454 AArch64::FCLAMP_VG2_2Z2Z_D}))
6455 SelectClamp(Node, 2, Op);
6456 return;
6457 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6458 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6459 return;
6460 case Intrinsic::aarch64_sve_sclamp_single_x4:
6462 Node->getValueType(0),
6463 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6464 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6465 SelectClamp(Node, 4, Op);
6466 return;
6467 case Intrinsic::aarch64_sve_uclamp_single_x4:
6469 Node->getValueType(0),
6470 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6471 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6472 SelectClamp(Node, 4, Op);
6473 return;
6474 case Intrinsic::aarch64_sve_fclamp_single_x4:
6476 Node->getValueType(0),
6477 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6478 AArch64::FCLAMP_VG4_4Z4Z_D}))
6479 SelectClamp(Node, 4, Op);
6480 return;
6481 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6482 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6483 return;
6484 case Intrinsic::aarch64_sve_add_single_x2:
6486 Node->getValueType(0),
6487 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6488 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6489 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6490 return;
6491 case Intrinsic::aarch64_sve_add_single_x4:
6493 Node->getValueType(0),
6494 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6495 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6496 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6497 return;
6498 case Intrinsic::aarch64_sve_zip_x2:
6500 Node->getValueType(0),
6501 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6502 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6503 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6504 return;
6505 case Intrinsic::aarch64_sve_zipq_x2:
6506 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6507 AArch64::ZIP_VG2_2ZZZ_Q);
6508 return;
6509 case Intrinsic::aarch64_sve_zip_x4:
6511 Node->getValueType(0),
6512 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6513 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6514 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6515 return;
6516 case Intrinsic::aarch64_sve_zipq_x4:
6517 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6518 AArch64::ZIP_VG4_4Z4Z_Q);
6519 return;
6520 case Intrinsic::aarch64_sve_uzp_x2:
6522 Node->getValueType(0),
6523 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6524 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6525 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6526 return;
6527 case Intrinsic::aarch64_sve_uzpq_x2:
6528 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6529 AArch64::UZP_VG2_2ZZZ_Q);
6530 return;
6531 case Intrinsic::aarch64_sve_uzp_x4:
6533 Node->getValueType(0),
6534 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6535 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6536 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6537 return;
6538 case Intrinsic::aarch64_sve_uzpq_x4:
6539 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6540 AArch64::UZP_VG4_4Z4Z_Q);
6541 return;
6542 case Intrinsic::aarch64_sve_sel_x2:
6544 Node->getValueType(0),
6545 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6546 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6547 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6548 return;
6549 case Intrinsic::aarch64_sve_sel_x4:
6551 Node->getValueType(0),
6552 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6553 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6554 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6555 return;
6556 case Intrinsic::aarch64_sve_frinta_x2:
6557 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6558 return;
6559 case Intrinsic::aarch64_sve_frinta_x4:
6560 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6561 return;
6562 case Intrinsic::aarch64_sve_frintm_x2:
6563 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6564 return;
6565 case Intrinsic::aarch64_sve_frintm_x4:
6566 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6567 return;
6568 case Intrinsic::aarch64_sve_frintn_x2:
6569 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6570 return;
6571 case Intrinsic::aarch64_sve_frintn_x4:
6572 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6573 return;
6574 case Intrinsic::aarch64_sve_frintp_x2:
6575 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6576 return;
6577 case Intrinsic::aarch64_sve_frintp_x4:
6578 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6579 return;
6580 case Intrinsic::aarch64_sve_sunpk_x2:
6582 Node->getValueType(0),
6583 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6584 AArch64::SUNPK_VG2_2ZZ_D}))
6585 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6586 return;
6587 case Intrinsic::aarch64_sve_uunpk_x2:
6589 Node->getValueType(0),
6590 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6591 AArch64::UUNPK_VG2_2ZZ_D}))
6592 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6593 return;
6594 case Intrinsic::aarch64_sve_sunpk_x4:
6596 Node->getValueType(0),
6597 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6598 AArch64::SUNPK_VG4_4Z2Z_D}))
6599 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6600 return;
6601 case Intrinsic::aarch64_sve_uunpk_x4:
6603 Node->getValueType(0),
6604 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6605 AArch64::UUNPK_VG4_4Z2Z_D}))
6606 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6607 return;
6608 case Intrinsic::aarch64_sve_pext_x2: {
6610 Node->getValueType(0),
6611 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6612 AArch64::PEXT_2PCI_D}))
6613 SelectPExtPair(Node, Op);
6614 return;
6615 }
6616 }
6617 break;
6618 }
6619 case ISD::INTRINSIC_VOID: {
6620 unsigned IntNo = Node->getConstantOperandVal(1);
6621 if (Node->getNumOperands() >= 3)
6622 VT = Node->getOperand(2)->getValueType(0);
6623 switch (IntNo) {
6624 default:
6625 break;
6626 case Intrinsic::aarch64_neon_st1x2: {
6627 if (VT == MVT::v8i8) {
6628 SelectStore(Node, 2, AArch64::ST1Twov8b);
6629 return;
6630 } else if (VT == MVT::v16i8) {
6631 SelectStore(Node, 2, AArch64::ST1Twov16b);
6632 return;
6633 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6634 VT == MVT::v4bf16) {
6635 SelectStore(Node, 2, AArch64::ST1Twov4h);
6636 return;
6637 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6638 VT == MVT::v8bf16) {
6639 SelectStore(Node, 2, AArch64::ST1Twov8h);
6640 return;
6641 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6642 SelectStore(Node, 2, AArch64::ST1Twov2s);
6643 return;
6644 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6645 SelectStore(Node, 2, AArch64::ST1Twov4s);
6646 return;
6647 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6648 SelectStore(Node, 2, AArch64::ST1Twov2d);
6649 return;
6650 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6651 SelectStore(Node, 2, AArch64::ST1Twov1d);
6652 return;
6653 }
6654 break;
6655 }
6656 case Intrinsic::aarch64_neon_st1x3: {
6657 if (VT == MVT::v8i8) {
6658 SelectStore(Node, 3, AArch64::ST1Threev8b);
6659 return;
6660 } else if (VT == MVT::v16i8) {
6661 SelectStore(Node, 3, AArch64::ST1Threev16b);
6662 return;
6663 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6664 VT == MVT::v4bf16) {
6665 SelectStore(Node, 3, AArch64::ST1Threev4h);
6666 return;
6667 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6668 VT == MVT::v8bf16) {
6669 SelectStore(Node, 3, AArch64::ST1Threev8h);
6670 return;
6671 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6672 SelectStore(Node, 3, AArch64::ST1Threev2s);
6673 return;
6674 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6675 SelectStore(Node, 3, AArch64::ST1Threev4s);
6676 return;
6677 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6678 SelectStore(Node, 3, AArch64::ST1Threev2d);
6679 return;
6680 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6681 SelectStore(Node, 3, AArch64::ST1Threev1d);
6682 return;
6683 }
6684 break;
6685 }
6686 case Intrinsic::aarch64_neon_st1x4: {
6687 if (VT == MVT::v8i8) {
6688 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6689 return;
6690 } else if (VT == MVT::v16i8) {
6691 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6692 return;
6693 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6694 VT == MVT::v4bf16) {
6695 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6696 return;
6697 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6698 VT == MVT::v8bf16) {
6699 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6700 return;
6701 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6702 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6703 return;
6704 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6705 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6706 return;
6707 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6708 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6709 return;
6710 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6711 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6712 return;
6713 }
6714 break;
6715 }
6716 case Intrinsic::aarch64_neon_st2: {
6717 if (VT == MVT::v8i8) {
6718 SelectStore(Node, 2, AArch64::ST2Twov8b);
6719 return;
6720 } else if (VT == MVT::v16i8) {
6721 SelectStore(Node, 2, AArch64::ST2Twov16b);
6722 return;
6723 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6724 VT == MVT::v4bf16) {
6725 SelectStore(Node, 2, AArch64::ST2Twov4h);
6726 return;
6727 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6728 VT == MVT::v8bf16) {
6729 SelectStore(Node, 2, AArch64::ST2Twov8h);
6730 return;
6731 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6732 SelectStore(Node, 2, AArch64::ST2Twov2s);
6733 return;
6734 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6735 SelectStore(Node, 2, AArch64::ST2Twov4s);
6736 return;
6737 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6738 SelectStore(Node, 2, AArch64::ST2Twov2d);
6739 return;
6740 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6741 SelectStore(Node, 2, AArch64::ST1Twov1d);
6742 return;
6743 }
6744 break;
6745 }
6746 case Intrinsic::aarch64_neon_st3: {
6747 if (VT == MVT::v8i8) {
6748 SelectStore(Node, 3, AArch64::ST3Threev8b);
6749 return;
6750 } else if (VT == MVT::v16i8) {
6751 SelectStore(Node, 3, AArch64::ST3Threev16b);
6752 return;
6753 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6754 VT == MVT::v4bf16) {
6755 SelectStore(Node, 3, AArch64::ST3Threev4h);
6756 return;
6757 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6758 VT == MVT::v8bf16) {
6759 SelectStore(Node, 3, AArch64::ST3Threev8h);
6760 return;
6761 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6762 SelectStore(Node, 3, AArch64::ST3Threev2s);
6763 return;
6764 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6765 SelectStore(Node, 3, AArch64::ST3Threev4s);
6766 return;
6767 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6768 SelectStore(Node, 3, AArch64::ST3Threev2d);
6769 return;
6770 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6771 SelectStore(Node, 3, AArch64::ST1Threev1d);
6772 return;
6773 }
6774 break;
6775 }
6776 case Intrinsic::aarch64_neon_st4: {
6777 if (VT == MVT::v8i8) {
6778 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6779 return;
6780 } else if (VT == MVT::v16i8) {
6781 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6782 return;
6783 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6784 VT == MVT::v4bf16) {
6785 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6786 return;
6787 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6788 VT == MVT::v8bf16) {
6789 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6790 return;
6791 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6792 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6793 return;
6794 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6795 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6796 return;
6797 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6798 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6799 return;
6800 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6801 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6802 return;
6803 }
6804 break;
6805 }
6806 case Intrinsic::aarch64_neon_st2lane: {
6807 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6808 SelectStoreLane(Node, 2, AArch64::ST2i8);
6809 return;
6810 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6811 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6812 SelectStoreLane(Node, 2, AArch64::ST2i16);
6813 return;
6814 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6815 VT == MVT::v2f32) {
6816 SelectStoreLane(Node, 2, AArch64::ST2i32);
6817 return;
6818 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6819 VT == MVT::v1f64) {
6820 SelectStoreLane(Node, 2, AArch64::ST2i64);
6821 return;
6822 }
6823 break;
6824 }
6825 case Intrinsic::aarch64_neon_st3lane: {
6826 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6827 SelectStoreLane(Node, 3, AArch64::ST3i8);
6828 return;
6829 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6830 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6831 SelectStoreLane(Node, 3, AArch64::ST3i16);
6832 return;
6833 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6834 VT == MVT::v2f32) {
6835 SelectStoreLane(Node, 3, AArch64::ST3i32);
6836 return;
6837 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6838 VT == MVT::v1f64) {
6839 SelectStoreLane(Node, 3, AArch64::ST3i64);
6840 return;
6841 }
6842 break;
6843 }
6844 case Intrinsic::aarch64_neon_st4lane: {
6845 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6846 SelectStoreLane(Node, 4, AArch64::ST4i8);
6847 return;
6848 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6849 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6850 SelectStoreLane(Node, 4, AArch64::ST4i16);
6851 return;
6852 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6853 VT == MVT::v2f32) {
6854 SelectStoreLane(Node, 4, AArch64::ST4i32);
6855 return;
6856 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6857 VT == MVT::v1f64) {
6858 SelectStoreLane(Node, 4, AArch64::ST4i64);
6859 return;
6860 }
6861 break;
6862 }
6863 case Intrinsic::aarch64_sve_st2q: {
6864 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6865 return;
6866 }
6867 case Intrinsic::aarch64_sve_st3q: {
6868 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6869 return;
6870 }
6871 case Intrinsic::aarch64_sve_st4q: {
6872 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6873 return;
6874 }
6875 case Intrinsic::aarch64_sve_st2: {
6876 if (VT == MVT::nxv16i8) {
6877 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6878 return;
6879 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6880 VT == MVT::nxv8bf16) {
6881 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6882 return;
6883 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6884 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6885 return;
6886 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6887 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6888 return;
6889 }
6890 break;
6891 }
6892 case Intrinsic::aarch64_sve_st3: {
6893 if (VT == MVT::nxv16i8) {
6894 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6895 return;
6896 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6897 VT == MVT::nxv8bf16) {
6898 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6899 return;
6900 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6901 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6902 return;
6903 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6904 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6905 return;
6906 }
6907 break;
6908 }
6909 case Intrinsic::aarch64_sve_st4: {
6910 if (VT == MVT::nxv16i8) {
6911 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6912 return;
6913 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6914 VT == MVT::nxv8bf16) {
6915 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6916 return;
6917 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6918 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6919 return;
6920 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6921 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6922 return;
6923 }
6924 break;
6925 }
6926 }
6927 break;
6928 }
6929 case AArch64ISD::LD2post: {
6930 if (VT == MVT::v8i8) {
6931 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6932 return;
6933 } else if (VT == MVT::v16i8) {
6934 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6935 return;
6936 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6937 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6938 return;
6939 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6940 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6941 return;
6942 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6943 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6944 return;
6945 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6946 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6947 return;
6948 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6949 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6950 return;
6951 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6952 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6953 return;
6954 }
6955 break;
6956 }
6957 case AArch64ISD::LD3post: {
6958 if (VT == MVT::v8i8) {
6959 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6960 return;
6961 } else if (VT == MVT::v16i8) {
6962 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6963 return;
6964 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6965 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6966 return;
6967 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6968 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6969 return;
6970 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6971 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6972 return;
6973 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6974 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6975 return;
6976 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6977 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6978 return;
6979 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6980 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6981 return;
6982 }
6983 break;
6984 }
6985 case AArch64ISD::LD4post: {
6986 if (VT == MVT::v8i8) {
6987 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6988 return;
6989 } else if (VT == MVT::v16i8) {
6990 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6991 return;
6992 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6993 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6994 return;
6995 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6996 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6997 return;
6998 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6999 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
7000 return;
7001 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7002 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
7003 return;
7004 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7005 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
7006 return;
7007 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7008 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
7009 return;
7010 }
7011 break;
7012 }
7013 case AArch64ISD::LD1x2post: {
7014 if (VT == MVT::v8i8) {
7015 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
7016 return;
7017 } else if (VT == MVT::v16i8) {
7018 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
7019 return;
7020 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7021 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
7022 return;
7023 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7024 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
7025 return;
7026 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7027 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
7028 return;
7029 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7030 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
7031 return;
7032 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7033 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
7034 return;
7035 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7036 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
7037 return;
7038 }
7039 break;
7040 }
7041 case AArch64ISD::LD1x3post: {
7042 if (VT == MVT::v8i8) {
7043 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
7044 return;
7045 } else if (VT == MVT::v16i8) {
7046 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
7047 return;
7048 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7049 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
7050 return;
7051 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7052 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
7053 return;
7054 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7055 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
7056 return;
7057 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7058 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
7059 return;
7060 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7061 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
7062 return;
7063 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7064 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
7065 return;
7066 }
7067 break;
7068 }
7069 case AArch64ISD::LD1x4post: {
7070 if (VT == MVT::v8i8) {
7071 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
7072 return;
7073 } else if (VT == MVT::v16i8) {
7074 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
7075 return;
7076 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7077 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
7078 return;
7079 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7080 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
7081 return;
7082 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7083 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
7084 return;
7085 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7086 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
7087 return;
7088 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7089 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
7090 return;
7091 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7092 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
7093 return;
7094 }
7095 break;
7096 }
7097 case AArch64ISD::LD1DUPpost: {
7098 if (VT == MVT::v8i8) {
7099 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
7100 return;
7101 } else if (VT == MVT::v16i8) {
7102 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
7103 return;
7104 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7105 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
7106 return;
7107 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7108 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
7109 return;
7110 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7111 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
7112 return;
7113 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7114 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
7115 return;
7116 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7117 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
7118 return;
7119 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7120 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
7121 return;
7122 }
7123 break;
7124 }
7125 case AArch64ISD::LD2DUPpost: {
7126 if (VT == MVT::v8i8) {
7127 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
7128 return;
7129 } else if (VT == MVT::v16i8) {
7130 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
7131 return;
7132 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7133 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
7134 return;
7135 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7136 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
7137 return;
7138 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7139 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
7140 return;
7141 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7142 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
7143 return;
7144 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7145 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
7146 return;
7147 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7148 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
7149 return;
7150 }
7151 break;
7152 }
7153 case AArch64ISD::LD3DUPpost: {
7154 if (VT == MVT::v8i8) {
7155 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
7156 return;
7157 } else if (VT == MVT::v16i8) {
7158 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7159 return;
7160 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7161 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7162 return;
7163 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7164 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7165 return;
7166 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7167 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7168 return;
7169 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7170 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7171 return;
7172 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7173 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7174 return;
7175 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7176 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7177 return;
7178 }
7179 break;
7180 }
7181 case AArch64ISD::LD4DUPpost: {
7182 if (VT == MVT::v8i8) {
7183 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7184 return;
7185 } else if (VT == MVT::v16i8) {
7186 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7187 return;
7188 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7189 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7190 return;
7191 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7192 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7193 return;
7194 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7195 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7196 return;
7197 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7198 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7199 return;
7200 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7201 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7202 return;
7203 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7204 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7205 return;
7206 }
7207 break;
7208 }
7209 case AArch64ISD::LD1LANEpost: {
7210 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7211 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7212 return;
7213 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7214 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7215 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7216 return;
7217 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7218 VT == MVT::v2f32) {
7219 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7220 return;
7221 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7222 VT == MVT::v1f64) {
7223 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7224 return;
7225 }
7226 break;
7227 }
7228 case AArch64ISD::LD2LANEpost: {
7229 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7230 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7231 return;
7232 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7233 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7234 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7235 return;
7236 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7237 VT == MVT::v2f32) {
7238 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7239 return;
7240 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7241 VT == MVT::v1f64) {
7242 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7243 return;
7244 }
7245 break;
7246 }
7247 case AArch64ISD::LD3LANEpost: {
7248 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7249 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7250 return;
7251 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7252 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7253 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7254 return;
7255 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7256 VT == MVT::v2f32) {
7257 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7258 return;
7259 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7260 VT == MVT::v1f64) {
7261 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7262 return;
7263 }
7264 break;
7265 }
7266 case AArch64ISD::LD4LANEpost: {
7267 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7268 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7269 return;
7270 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7271 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7272 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7273 return;
7274 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7275 VT == MVT::v2f32) {
7276 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7277 return;
7278 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7279 VT == MVT::v1f64) {
7280 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7281 return;
7282 }
7283 break;
7284 }
7285 case AArch64ISD::ST2post: {
7286 VT = Node->getOperand(1).getValueType();
7287 if (VT == MVT::v8i8) {
7288 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7289 return;
7290 } else if (VT == MVT::v16i8) {
7291 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7292 return;
7293 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7294 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7295 return;
7296 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7297 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7298 return;
7299 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7300 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7301 return;
7302 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7303 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7304 return;
7305 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7306 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7307 return;
7308 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7309 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7310 return;
7311 }
7312 break;
7313 }
7314 case AArch64ISD::ST3post: {
7315 VT = Node->getOperand(1).getValueType();
7316 if (VT == MVT::v8i8) {
7317 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7318 return;
7319 } else if (VT == MVT::v16i8) {
7320 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7321 return;
7322 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7323 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7324 return;
7325 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7326 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7327 return;
7328 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7329 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7330 return;
7331 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7332 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7333 return;
7334 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7335 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7336 return;
7337 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7338 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7339 return;
7340 }
7341 break;
7342 }
7343 case AArch64ISD::ST4post: {
7344 VT = Node->getOperand(1).getValueType();
7345 if (VT == MVT::v8i8) {
7346 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7347 return;
7348 } else if (VT == MVT::v16i8) {
7349 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7350 return;
7351 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7352 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7353 return;
7354 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7355 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7356 return;
7357 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7358 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7359 return;
7360 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7361 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7362 return;
7363 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7364 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7365 return;
7366 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7367 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7368 return;
7369 }
7370 break;
7371 }
7372 case AArch64ISD::ST1x2post: {
7373 VT = Node->getOperand(1).getValueType();
7374 if (VT == MVT::v8i8) {
7375 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7376 return;
7377 } else if (VT == MVT::v16i8) {
7378 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7379 return;
7380 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7381 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7382 return;
7383 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7384 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7385 return;
7386 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7387 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7388 return;
7389 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7390 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7391 return;
7392 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7393 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7394 return;
7395 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7396 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7397 return;
7398 }
7399 break;
7400 }
7401 case AArch64ISD::ST1x3post: {
7402 VT = Node->getOperand(1).getValueType();
7403 if (VT == MVT::v8i8) {
7404 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7405 return;
7406 } else if (VT == MVT::v16i8) {
7407 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7408 return;
7409 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7410 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7411 return;
7412 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7413 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7414 return;
7415 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7416 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7417 return;
7418 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7419 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7420 return;
7421 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7422 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7423 return;
7424 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7425 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7426 return;
7427 }
7428 break;
7429 }
7430 case AArch64ISD::ST1x4post: {
7431 VT = Node->getOperand(1).getValueType();
7432 if (VT == MVT::v8i8) {
7433 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7434 return;
7435 } else if (VT == MVT::v16i8) {
7436 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7437 return;
7438 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7439 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7440 return;
7441 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7442 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7443 return;
7444 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7445 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7446 return;
7447 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7448 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7449 return;
7450 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7451 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7452 return;
7453 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7454 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7455 return;
7456 }
7457 break;
7458 }
7459 case AArch64ISD::ST2LANEpost: {
7460 VT = Node->getOperand(1).getValueType();
7461 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7462 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7463 return;
7464 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7465 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7466 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7467 return;
7468 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7469 VT == MVT::v2f32) {
7470 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7471 return;
7472 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7473 VT == MVT::v1f64) {
7474 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7475 return;
7476 }
7477 break;
7478 }
7479 case AArch64ISD::ST3LANEpost: {
7480 VT = Node->getOperand(1).getValueType();
7481 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7482 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7483 return;
7484 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7485 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7486 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7487 return;
7488 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7489 VT == MVT::v2f32) {
7490 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7491 return;
7492 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7493 VT == MVT::v1f64) {
7494 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7495 return;
7496 }
7497 break;
7498 }
7499 case AArch64ISD::ST4LANEpost: {
7500 VT = Node->getOperand(1).getValueType();
7501 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7502 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7503 return;
7504 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7505 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7506 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7507 return;
7508 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7509 VT == MVT::v2f32) {
7510 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7511 return;
7512 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7513 VT == MVT::v1f64) {
7514 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7515 return;
7516 }
7517 break;
7518 }
7519 }
7520
7521 // Select the default instruction
7522 SelectCode(Node);
7523}
7524
7525/// createAArch64ISelDag - This pass converts a legalized DAG into a
7526/// AArch64-specific DAG, ready for instruction scheduling.
7528 CodeGenOptLevel OptLevel) {
7529 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7530}
7531
7532/// When \p PredVT is a scalable vector predicate in the form
7533/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7534/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7535/// structured vectors (NumVec >1), the output data type is
7536/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7537/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7538/// EVT.
7540 unsigned NumVec) {
7541 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7542 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7543 return EVT();
7544
7545 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7546 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7547 return EVT();
7548
7549 ElementCount EC = PredVT.getVectorElementCount();
7550 EVT ScalarVT =
7551 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7552 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7553
7554 return MemVT;
7555}
7556
7557/// Return the EVT of the data associated to a memory operation in \p
7558/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7560 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7561 return MemIntr->getMemoryVT();
7562
7563 if (isa<MemSDNode>(Root)) {
7564 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7565
7566 EVT DataVT;
7567 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7568 DataVT = Load->getValueType(0);
7569 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7570 DataVT = Load->getValueType(0);
7571 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7572 DataVT = Store->getValue().getValueType();
7573 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7574 DataVT = Store->getValue().getValueType();
7575 else
7576 llvm_unreachable("Unexpected MemSDNode!");
7577
7578 return DataVT.changeVectorElementType(Ctx, MemVT.getVectorElementType());
7579 }
7580
7581 const unsigned Opcode = Root->getOpcode();
7582 // For custom ISD nodes, we have to look at them individually to extract the
7583 // type of the data moved to/from memory.
7584 switch (Opcode) {
7585 case AArch64ISD::LD1_MERGE_ZERO:
7586 case AArch64ISD::LD1S_MERGE_ZERO:
7587 case AArch64ISD::LDNF1_MERGE_ZERO:
7588 case AArch64ISD::LDNF1S_MERGE_ZERO:
7589 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7590 case AArch64ISD::ST1_PRED:
7591 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7592 default:
7593 break;
7594 }
7595
7596 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7597 return EVT();
7598
7599 switch (Root->getConstantOperandVal(1)) {
7600 default:
7601 return EVT();
7602 case Intrinsic::aarch64_sme_ldr:
7603 case Intrinsic::aarch64_sme_str:
7604 return MVT::nxv16i8;
7605 case Intrinsic::aarch64_sve_prf:
7606 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7607 // width of the predicate.
7609 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7610 case Intrinsic::aarch64_sve_ld2_sret:
7611 case Intrinsic::aarch64_sve_ld2q_sret:
7613 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7614 case Intrinsic::aarch64_sve_st2q:
7616 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7617 case Intrinsic::aarch64_sve_ld3_sret:
7618 case Intrinsic::aarch64_sve_ld3q_sret:
7620 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7621 case Intrinsic::aarch64_sve_st3q:
7623 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7624 case Intrinsic::aarch64_sve_ld4_sret:
7625 case Intrinsic::aarch64_sve_ld4q_sret:
7627 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7628 case Intrinsic::aarch64_sve_st4q:
7630 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7631 case Intrinsic::aarch64_sve_ld1udq:
7632 case Intrinsic::aarch64_sve_st1dq:
7633 return EVT(MVT::nxv1i64);
7634 case Intrinsic::aarch64_sve_ld1uwq:
7635 case Intrinsic::aarch64_sve_st1wq:
7636 return EVT(MVT::nxv1i32);
7637 }
7638}
7639
7640/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7641/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7642/// where Root is the memory access using N for its address.
7643template <int64_t Min, int64_t Max>
7644bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7645 SDValue &Base,
7646 SDValue &OffImm) {
7647 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7648 const DataLayout &DL = CurDAG->getDataLayout();
7649 const MachineFrameInfo &MFI = MF->getFrameInfo();
7650
7651 if (N.getOpcode() == ISD::FrameIndex) {
7652 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7653 // We can only encode VL scaled offsets, so only fold in frame indexes
7654 // referencing SVE objects.
7655 if (MFI.hasScalableStackID(FI)) {
7656 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7657 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7658 return true;
7659 }
7660
7661 return false;
7662 }
7663
7664 if (MemVT == EVT())
7665 return false;
7666
7667 if (N.getOpcode() != ISD::ADD)
7668 return false;
7669
7670 SDValue VScale = N.getOperand(1);
7671 int64_t MulImm = std::numeric_limits<int64_t>::max();
7672 if (VScale.getOpcode() == ISD::VSCALE) {
7673 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7674 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7675 int64_t ByteOffset = C->getSExtValue();
7676 const auto KnownVScale =
7678
7679 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7680 return false;
7681
7682 MulImm = ByteOffset / KnownVScale;
7683 } else
7684 return false;
7685
7686 TypeSize TS = MemVT.getSizeInBits();
7687 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7688
7689 if ((MulImm % MemWidthBytes) != 0)
7690 return false;
7691
7692 int64_t Offset = MulImm / MemWidthBytes;
7694 return false;
7695
7696 Base = N.getOperand(0);
7697 if (Base.getOpcode() == ISD::FrameIndex) {
7698 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7699 // We can only encode VL scaled offsets, so only fold in frame indexes
7700 // referencing SVE objects.
7701 if (MFI.hasScalableStackID(FI))
7702 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7703 }
7704
7705 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7706 return true;
7707}
7708
7709/// Select register plus register addressing mode for SVE, with scaled
7710/// offset.
7711bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7712 SDValue &Base,
7713 SDValue &Offset) {
7714 if (N.getOpcode() != ISD::ADD)
7715 return false;
7716
7717 // Process an ADD node.
7718 const SDValue LHS = N.getOperand(0);
7719 const SDValue RHS = N.getOperand(1);
7720
7721 // 8 bit data does not come with the SHL node, so it is treated
7722 // separately.
7723 if (Scale == 0) {
7724 Base = LHS;
7725 Offset = RHS;
7726 return true;
7727 }
7728
7729 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7730 int64_t ImmOff = C->getSExtValue();
7731 unsigned Size = 1 << Scale;
7732
7733 // To use the reg+reg addressing mode, the immediate must be a multiple of
7734 // the vector element's byte size.
7735 if (ImmOff % Size)
7736 return false;
7737
7738 SDLoc DL(N);
7739 Base = LHS;
7740 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7741 SDValue Ops[] = {Offset};
7742 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7743 Offset = SDValue(MI, 0);
7744 return true;
7745 }
7746
7747 // Check if the RHS is a shift node with a constant.
7748 if (RHS.getOpcode() != ISD::SHL)
7749 return false;
7750
7751 const SDValue ShiftRHS = RHS.getOperand(1);
7752 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7753 if (C->getZExtValue() == Scale) {
7754 Base = LHS;
7755 Offset = RHS.getOperand(0);
7756 return true;
7757 }
7758
7759 return false;
7760}
7761
7762bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7763 const AArch64TargetLowering *TLI =
7764 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7765
7766 return TLI->isAllActivePredicate(*CurDAG, N);
7767}
7768
7769bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7770 EVT VT = N.getValueType();
7771 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7772}
7773
7774bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7776 unsigned Scale) {
7777 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7778 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
7779 int64_t ImmOff = C->getSExtValue();
7780 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7781 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7782 }
7783 return SDValue();
7784 };
7785
7786 if (SDValue C = MatchConstantOffset(N)) {
7787 Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7788 Offset = C;
7789 return true;
7790 }
7791
7792 // Try to untangle an ADD node into a 'reg + offset'
7793 if (CurDAG->isBaseWithConstantOffset(N)) {
7794 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7795 Base = N.getOperand(0);
7796 Offset = C;
7797 return true;
7798 }
7799 }
7800
7801 // By default, just match reg + 0.
7802 Base = N;
7803 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7804 return true;
7805}
7806
7807bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7808 SDValue &Imm) {
7810 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
7811 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
7812 // Check conservatively if the immediate fits the valid range [0, 64).
7813 // Immediate variants for GE and HS definitely need to be decremented
7814 // when lowering the pseudos later, so an immediate of 1 would become 0.
7815 // For the inverse conditions LT and LO we don't know for sure if they
7816 // will need a decrement but should the decision be made to reverse the
7817 // branch condition, we again end up with the need to decrement.
7818 // The same argument holds for LE, LS, GT and HI and possibly
7819 // incremented immediates. This can lead to slightly less optimal
7820 // codegen, e.g. we never codegen the legal case
7821 // cblt w0, #63, A
7822 // because we could end up with the illegal case
7823 // cbge w0, #64, B
7824 // should the decision to reverse the branch direction be made. For the
7825 // lower bound cases this is no problem since we can express comparisons
7826 // against 0 with either tbz/tnbz or using wzr/xzr.
7827 uint64_t LowerBound = 0, UpperBound = 64;
7828 switch (CC) {
7829 case AArch64CC::GE:
7830 case AArch64CC::HS:
7831 case AArch64CC::LT:
7832 case AArch64CC::LO:
7833 LowerBound = 1;
7834 break;
7835 case AArch64CC::LE:
7836 case AArch64CC::LS:
7837 case AArch64CC::GT:
7838 case AArch64CC::HI:
7839 UpperBound = 63;
7840 break;
7841 default:
7842 break;
7843 }
7844
7845 if (CN->getAPIntValue().uge(LowerBound) &&
7846 CN->getAPIntValue().ult(UpperBound)) {
7847 SDLoc DL(N);
7848 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
7849 return true;
7850 }
7851 }
7852
7853 return false;
7854}
7855
7856template <bool MatchCBB>
7857bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
7858 SDValue &ExtType) {
7859
7860 // Use an invalid shift-extend value to indicate we don't need to extend later
7861 if (N.getOpcode() == ISD::AssertZext || N.getOpcode() == ISD::AssertSext) {
7862 EVT Ty = cast<VTSDNode>(N.getOperand(1))->getVT();
7863 if (Ty != (MatchCBB ? MVT::i8 : MVT::i16))
7864 return false;
7865 Reg = N.getOperand(0);
7866 ExtType = CurDAG->getSignedTargetConstant(AArch64_AM::InvalidShiftExtend,
7867 SDLoc(N), MVT::i32);
7868 return true;
7869 }
7870
7872
7873 if ((MatchCBB && (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB)) ||
7874 (!MatchCBB && (ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH))) {
7875 Reg = N.getOperand(0);
7876 ExtType =
7877 CurDAG->getTargetConstant(getExtendEncoding(ET), SDLoc(N), MVT::i32);
7878 return true;
7879 }
7880
7881 return false;
7882}
7883
7884void AArch64DAGToDAGISel::PreprocessISelDAG() {
7885 bool MadeChange = false;
7886 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
7887 if (N.use_empty())
7888 continue;
7889
7891 switch (N.getOpcode()) {
7892 case ISD::SCALAR_TO_VECTOR: {
7893 EVT ScalarTy = N.getValueType(0).getVectorElementType();
7894 if ((ScalarTy == MVT::i32 || ScalarTy == MVT::i64) &&
7895 ScalarTy == N.getOperand(0).getValueType())
7896 Result = addBitcastHints(*CurDAG, N);
7897
7898 break;
7899 }
7900 default:
7901 break;
7902 }
7903
7904 if (Result) {
7905 LLVM_DEBUG(dbgs() << "AArch64 DAG preprocessing replacing:\nOld: ");
7906 LLVM_DEBUG(N.dump(CurDAG));
7907 LLVM_DEBUG(dbgs() << "\nNew: ");
7908 LLVM_DEBUG(Result.dump(CurDAG));
7909 LLVM_DEBUG(dbgs() << "\n");
7910
7911 CurDAG->ReplaceAllUsesOfValueWith(SDValue(&N, 0), Result);
7912 MadeChange = true;
7913 }
7914 }
7915
7916 if (MadeChange)
7917 CurDAG->RemoveDeadNodes();
7918
7920}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static SDValue addBitcastHints(SelectionDAG &DAG, SDNode &N)
addBitcastHints - This method adds bitcast hints to the operands of a node to help instruction select...
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
return SDValue()
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_NAME
Value * RHS
Value * LHS
const AArch64RegisterInfo * getRegisterInfo() const override
bool isStreaming() const
Returns true if the function has a streaming body.
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
unsigned getSVEVectorSizeInBits() const
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1685
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1044
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1654
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1613
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1467
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasScalableStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual void PreprocessISelDAG()
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getID() const
Return the register class ID number.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:440
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:967
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t decodeAdvSIMDModImmType12(uint8_t Imm)
static uint64_t decodeAdvSIMDModImmType11(uint8_t Imm)
unsigned getExtendEncoding(AArch64_AM::ShiftExtendType ET)
Mapping from extend bits to required operation: shifter: 000 ==> uxtb 001 ==> uxth 010 ==> uxtw 011 =...
static bool isSVELogicalImm(unsigned SizeInBits, uint64_t ImmVal, uint64_t &Encoding)
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
isSignExtendShiftType - Returns true if Type is sign extending.
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Not(const Pred &P) -> Not< Pred >
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
unsigned CheckFixedPointOperandConstant(APFloat &FVal, unsigned RegWidth, bool isReciprocal)
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2026
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
Extended Value Type.
Definition ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
ElementCount getVectorElementCount() const
Definition ValueTypes.h:358
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:471
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:367
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:215
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
bool isFixedLengthVector() const
Definition ValueTypes.h:189
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:182
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:210
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.