LLVM 22.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64
65 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
66 /// inline asm expressions.
67 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
68 InlineAsm::ConstraintCode ConstraintID,
69 std::vector<SDValue> &OutOps) override;
70
71 template <signed Low, signed High, signed Scale>
72 bool SelectRDVLImm(SDValue N, SDValue &Imm);
73
74 template <signed Low, signed High>
75 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
76
77 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
78 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
79 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
80 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
81 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
82 return SelectShiftedRegister(N, false, Reg, Shift);
83 }
84 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
85 return SelectShiftedRegister(N, true, Reg, Shift);
86 }
87 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
88 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
89 }
90 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
91 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
92 }
93 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
94 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
95 }
96 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
97 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
98 }
99 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
100 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
101 }
102 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
103 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
104 }
105 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
106 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
107 }
108 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
109 return SelectAddrModeIndexed(N, 1, Base, OffImm);
110 }
111 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
112 return SelectAddrModeIndexed(N, 2, Base, OffImm);
113 }
114 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
115 return SelectAddrModeIndexed(N, 4, Base, OffImm);
116 }
117 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
118 return SelectAddrModeIndexed(N, 8, Base, OffImm);
119 }
120 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
121 return SelectAddrModeIndexed(N, 16, Base, OffImm);
122 }
123 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
124 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
125 }
126 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
127 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
128 }
129 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
130 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
131 }
132 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
133 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
134 }
135 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
136 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
137 }
138 template <unsigned Size, unsigned Max>
139 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
140 // Test if there is an appropriate addressing mode and check if the
141 // immediate fits.
142 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
143 if (Found) {
144 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
145 int64_t C = CI->getSExtValue();
146 if (C <= Max)
147 return true;
148 }
149 }
150
151 // Otherwise, base only, materialize address in register.
152 Base = N;
153 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
154 return true;
155 }
156
157 template<int Width>
158 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
159 SDValue &SignExtend, SDValue &DoShift) {
160 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
161 }
162
163 template<int Width>
164 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
165 SDValue &SignExtend, SDValue &DoShift) {
166 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
167 }
168
169 bool SelectExtractHigh(SDValue N, SDValue &Res) {
170 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
171 N = N->getOperand(0);
172 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
173 !isa<ConstantSDNode>(N->getOperand(1)))
174 return false;
175 EVT VT = N->getValueType(0);
176 EVT LVT = N->getOperand(0).getValueType();
177 unsigned Index = N->getConstantOperandVal(1);
178 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
179 Index != VT.getVectorNumElements())
180 return false;
181 Res = N->getOperand(0);
182 return true;
183 }
184
185 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
186 if (N.getOpcode() != AArch64ISD::VLSHR)
187 return false;
188 SDValue Op = N->getOperand(0);
189 EVT VT = Op.getValueType();
190 unsigned ShtAmt = N->getConstantOperandVal(1);
191 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
192 return false;
193
194 APInt Imm;
195 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
196 Imm = APInt(VT.getScalarSizeInBits(),
197 Op.getOperand(1).getConstantOperandVal(0)
198 << Op.getOperand(1).getConstantOperandVal(1));
199 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
200 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
201 Imm = APInt(VT.getScalarSizeInBits(),
202 Op.getOperand(1).getConstantOperandVal(0));
203 else
204 return false;
205
206 if (Imm != 1ULL << (ShtAmt - 1))
207 return false;
208
209 Res1 = Op.getOperand(0);
210 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
211 return true;
212 }
213
214 bool SelectDupZeroOrUndef(SDValue N) {
215 switch(N->getOpcode()) {
216 case ISD::UNDEF:
217 return true;
218 case AArch64ISD::DUP:
219 case ISD::SPLAT_VECTOR: {
220 auto Opnd0 = N->getOperand(0);
221 if (isNullConstant(Opnd0))
222 return true;
223 if (isNullFPConstant(Opnd0))
224 return true;
225 break;
226 }
227 default:
228 break;
229 }
230
231 return false;
232 }
233
234 bool SelectAny(SDValue) { return true; }
235
236 bool SelectDupZero(SDValue N) {
237 switch(N->getOpcode()) {
238 case AArch64ISD::DUP:
239 case ISD::SPLAT_VECTOR: {
240 auto Opnd0 = N->getOperand(0);
241 if (isNullConstant(Opnd0))
242 return true;
243 if (isNullFPConstant(Opnd0))
244 return true;
245 break;
246 }
247 }
248
249 return false;
250 }
251
252 template <MVT::SimpleValueType VT, bool Negate>
253 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
254 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
255 }
256
257 template <MVT::SimpleValueType VT, bool Negate>
258 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
259 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
260 }
261
262 template <MVT::SimpleValueType VT>
263 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
264 return SelectSVECpyDupImm(N, VT, Imm, Shift);
265 }
266
267 template <MVT::SimpleValueType VT, bool Invert = false>
268 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
269 return SelectSVELogicalImm(N, VT, Imm, Invert);
270 }
271
272 template <MVT::SimpleValueType VT>
273 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
274 return SelectSVEArithImm(N, VT, Imm);
275 }
276
277 template <unsigned Low, unsigned High, bool AllowSaturation = false>
278 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
279 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
280 }
281
282 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
283 if (N->getOpcode() != ISD::SPLAT_VECTOR)
284 return false;
285
286 EVT EltVT = N->getValueType(0).getVectorElementType();
287 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
288 /* High */ EltVT.getFixedSizeInBits(),
289 /* AllowSaturation */ true, Imm);
290 }
291
292 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
293 template<signed Min, signed Max, signed Scale, bool Shift>
294 bool SelectCntImm(SDValue N, SDValue &Imm) {
296 return false;
297
298 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
299 if (Shift)
300 MulImm = 1LL << MulImm;
301
302 if ((MulImm % std::abs(Scale)) != 0)
303 return false;
304
305 MulImm /= Scale;
306 if ((MulImm >= Min) && (MulImm <= Max)) {
307 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
308 return true;
309 }
310
311 return false;
312 }
313
314 template <signed Max, signed Scale>
315 bool SelectEXTImm(SDValue N, SDValue &Imm) {
317 return false;
318
319 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
320
321 if (MulImm >= 0 && MulImm <= Max) {
322 MulImm *= Scale;
323 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
324 return true;
325 }
326
327 return false;
328 }
329
330 template <unsigned BaseReg, unsigned Max>
331 bool ImmToReg(SDValue N, SDValue &Imm) {
332 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
333 uint64_t C = CI->getZExtValue();
334
335 if (C > Max)
336 return false;
337
338 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
339 return true;
340 }
341 return false;
342 }
343
344 /// Form sequences of consecutive 64/128-bit registers for use in NEON
345 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
346 /// between 1 and 4 elements. If it contains a single element that is returned
347 /// unchanged; otherwise a REG_SEQUENCE value is returned.
350 // Form a sequence of SVE registers for instructions using list of vectors,
351 // e.g. structured loads and stores (ldN, stN).
352 SDValue createZTuple(ArrayRef<SDValue> Vecs);
353
354 // Similar to above, except the register must start at a multiple of the
355 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
356 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
357
358 /// Generic helper for the createDTuple/createQTuple
359 /// functions. Those should almost always be called instead.
360 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
361 const unsigned SubRegs[]);
362
363 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
364
365 bool tryIndexedLoad(SDNode *N);
366
367 void SelectPtrauthAuth(SDNode *N);
368 void SelectPtrauthResign(SDNode *N);
369
370 bool trySelectStackSlotTagP(SDNode *N);
371 void SelectTagP(SDNode *N);
372
373 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
374 unsigned SubRegIdx);
375 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
376 unsigned SubRegIdx);
377 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
378 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
380 unsigned Opc_rr, unsigned Opc_ri,
381 bool IsIntr = false);
382 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
383 unsigned Scale, unsigned Opc_ri,
384 unsigned Opc_rr);
385 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
386 bool IsZmMulti, unsigned Opcode,
387 bool HasPred = false);
388 void SelectPExtPair(SDNode *N, unsigned Opc);
389 void SelectWhilePair(SDNode *N, unsigned Opc);
390 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
391 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
392 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
394 bool IsTupleInput, unsigned Opc);
395 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
396
397 template <unsigned MaxIdx, unsigned Scale>
398 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
399 unsigned Op);
400 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
401 unsigned Op, unsigned MaxIdx, unsigned Scale,
402 unsigned BaseReg = 0);
403 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
404 /// SVE Reg+Imm addressing mode.
405 template <int64_t Min, int64_t Max>
406 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
407 SDValue &OffImm);
408 /// SVE Reg+Reg address mode.
409 template <unsigned Scale>
410 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
411 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
412 }
413
414 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
415 unsigned Opc, uint32_t MaxImm);
416
417 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
418
419 template <unsigned MaxIdx, unsigned Scale>
420 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
421 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
422 }
423
424 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
425 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
426 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
428 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
429 unsigned Opc_rr, unsigned Opc_ri);
430 std::tuple<unsigned, SDValue, SDValue>
431 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
432 const SDValue &OldBase, const SDValue &OldOffset,
433 unsigned Scale);
434
435 bool tryBitfieldExtractOp(SDNode *N);
436 bool tryBitfieldExtractOpFromSExt(SDNode *N);
437 bool tryBitfieldInsertOp(SDNode *N);
438 bool tryBitfieldInsertInZeroOp(SDNode *N);
439 bool tryShiftAmountMod(SDNode *N);
440
441 bool tryReadRegister(SDNode *N);
442 bool tryWriteRegister(SDNode *N);
443
444 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
445 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
446
447 bool trySelectXAR(SDNode *N);
448
449// Include the pieces autogenerated from the target description.
450#include "AArch64GenDAGISel.inc"
451
452private:
453 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
454 SDValue &Shift);
455 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
456 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
457 SDValue &OffImm) {
458 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
459 }
460 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
461 unsigned Size, SDValue &Base,
462 SDValue &OffImm);
463 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
464 SDValue &OffImm);
465 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
466 SDValue &OffImm);
467 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
468 SDValue &Offset, SDValue &SignExtend,
469 SDValue &DoShift);
470 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
471 SDValue &Offset, SDValue &SignExtend,
472 SDValue &DoShift);
473 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
474 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
475 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
476 SDValue &Offset, SDValue &SignExtend);
477
478 template<unsigned RegWidth>
479 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
480 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
481 }
482
483 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
484
485 template<unsigned RegWidth>
486 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
487 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
488 }
489
490 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
491 unsigned Width);
492
493 bool SelectCMP_SWAP(SDNode *N);
494
495 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
496 bool Negate);
497 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
498 bool Negate);
499 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
500 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
501
502 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
503 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
504 bool AllowSaturation, SDValue &Imm);
505
506 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
507 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
508 SDValue &Offset);
509 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
510 SDValue &Offset, unsigned Scale = 1);
511
512 bool SelectAllActivePredicate(SDValue N);
513 bool SelectAnyPredicate(SDValue N);
514
515 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
516
517 template <bool MatchCBB>
518 bool SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, SDValue &ExtType);
519};
520
521class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
522public:
523 static char ID;
524 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
525 CodeGenOptLevel OptLevel)
527 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
528};
529} // end anonymous namespace
530
531char AArch64DAGToDAGISelLegacy::ID = 0;
532
533INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
534
535/// isIntImmediate - This method tests to see if the node is a constant
536/// operand. If so Imm will receive the 32-bit value.
537static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
539 Imm = C->getZExtValue();
540 return true;
541 }
542 return false;
543}
544
545// isIntImmediate - This method tests to see if a constant operand.
546// If so Imm will receive the value.
547static bool isIntImmediate(SDValue N, uint64_t &Imm) {
548 return isIntImmediate(N.getNode(), Imm);
549}
550
551// isOpcWithIntImmediate - This method tests to see if the node is a specific
552// opcode and that it has a immediate integer right operand.
553// If so Imm will receive the 32 bit value.
554static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
555 uint64_t &Imm) {
556 return N->getOpcode() == Opc &&
557 isIntImmediate(N->getOperand(1).getNode(), Imm);
558}
559
560// isIntImmediateEq - This method tests to see if N is a constant operand that
561// is equivalent to 'ImmExpected'.
562#ifndef NDEBUG
563static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
564 uint64_t Imm;
565 if (!isIntImmediate(N.getNode(), Imm))
566 return false;
567 return Imm == ImmExpected;
568}
569#endif
570
571bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
572 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
573 std::vector<SDValue> &OutOps) {
574 switch(ConstraintID) {
575 default:
576 llvm_unreachable("Unexpected asm memory constraint");
577 case InlineAsm::ConstraintCode::m:
578 case InlineAsm::ConstraintCode::o:
579 case InlineAsm::ConstraintCode::Q:
580 // We need to make sure that this one operand does not end up in XZR, thus
581 // require the address to be in a PointerRegClass register.
582 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
583 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
584 SDLoc dl(Op);
585 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
586 SDValue NewOp =
587 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
588 dl, Op.getValueType(),
589 Op, RC), 0);
590 OutOps.push_back(NewOp);
591 return false;
592 }
593 return true;
594}
595
596/// SelectArithImmed - Select an immediate value that can be represented as
597/// a 12-bit value shifted left by either 0 or 12. If so, return true with
598/// Val set to the 12-bit value and Shift set to the shifter operand.
599bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
600 SDValue &Shift) {
601 // This function is called from the addsub_shifted_imm ComplexPattern,
602 // which lists [imm] as the list of opcode it's interested in, however
603 // we still need to check whether the operand is actually an immediate
604 // here because the ComplexPattern opcode list is only used in
605 // root-level opcode matching.
606 if (!isa<ConstantSDNode>(N.getNode()))
607 return false;
608
609 uint64_t Immed = N.getNode()->getAsZExtVal();
610 unsigned ShiftAmt;
611
612 if (Immed >> 12 == 0) {
613 ShiftAmt = 0;
614 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
615 ShiftAmt = 12;
616 Immed = Immed >> 12;
617 } else
618 return false;
619
620 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
621 SDLoc dl(N);
622 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
623 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
624 return true;
625}
626
627/// SelectNegArithImmed - As above, but negates the value before trying to
628/// select it.
629bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
630 SDValue &Shift) {
631 // This function is called from the addsub_shifted_imm ComplexPattern,
632 // which lists [imm] as the list of opcode it's interested in, however
633 // we still need to check whether the operand is actually an immediate
634 // here because the ComplexPattern opcode list is only used in
635 // root-level opcode matching.
636 if (!isa<ConstantSDNode>(N.getNode()))
637 return false;
638
639 // The immediate operand must be a 24-bit zero-extended immediate.
640 uint64_t Immed = N.getNode()->getAsZExtVal();
641
642 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
643 // have the opposite effect on the C flag, so this pattern mustn't match under
644 // those circumstances.
645 if (Immed == 0)
646 return false;
647
648 if (N.getValueType() == MVT::i32)
649 Immed = ~((uint32_t)Immed) + 1;
650 else
651 Immed = ~Immed + 1ULL;
652 if (Immed & 0xFFFFFFFFFF000000ULL)
653 return false;
654
655 Immed &= 0xFFFFFFULL;
656 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
657 Shift);
658}
659
660/// getShiftTypeForNode - Translate a shift node to the corresponding
661/// ShiftType value.
663 switch (N.getOpcode()) {
664 default:
666 case ISD::SHL:
667 return AArch64_AM::LSL;
668 case ISD::SRL:
669 return AArch64_AM::LSR;
670 case ISD::SRA:
671 return AArch64_AM::ASR;
672 case ISD::ROTR:
673 return AArch64_AM::ROR;
674 }
675}
676
678 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
679}
680
681/// Determine whether it is worth it to fold SHL into the addressing
682/// mode.
684 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
685 // It is worth folding logical shift of up to three places.
686 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
687 if (!CSD)
688 return false;
689 unsigned ShiftVal = CSD->getZExtValue();
690 if (ShiftVal > 3)
691 return false;
692
693 // Check if this particular node is reused in any non-memory related
694 // operation. If yes, do not try to fold this node into the address
695 // computation, since the computation will be kept.
696 const SDNode *Node = V.getNode();
697 for (SDNode *UI : Node->users())
698 if (!isMemOpOrPrefetch(UI))
699 for (SDNode *UII : UI->users())
700 if (!isMemOpOrPrefetch(UII))
701 return false;
702 return true;
703}
704
705/// Determine whether it is worth to fold V into an extended register addressing
706/// mode.
707bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
708 // Trivial if we are optimizing for code size or if there is only
709 // one use of the value.
710 if (CurDAG->shouldOptForSize() || V.hasOneUse())
711 return true;
712
713 // If a subtarget has a slow shift, folding a shift into multiple loads
714 // costs additional micro-ops.
715 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
716 return false;
717
718 // Check whether we're going to emit the address arithmetic anyway because
719 // it's used by a non-address operation.
720 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
721 return true;
722 if (V.getOpcode() == ISD::ADD) {
723 const SDValue LHS = V.getOperand(0);
724 const SDValue RHS = V.getOperand(1);
725 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
726 return true;
727 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
728 return true;
729 }
730
731 // It hurts otherwise, since the value will be reused.
732 return false;
733}
734
735/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
736/// to select more shifted register
737bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
738 SDValue &Shift) {
739 EVT VT = N.getValueType();
740 if (VT != MVT::i32 && VT != MVT::i64)
741 return false;
742
743 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
744 return false;
745 SDValue LHS = N.getOperand(0);
746 if (!LHS->hasOneUse())
747 return false;
748
749 unsigned LHSOpcode = LHS->getOpcode();
750 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
751 return false;
752
753 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
754 if (!ShiftAmtNode)
755 return false;
756
757 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
758 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
759 if (!RHSC)
760 return false;
761
762 APInt AndMask = RHSC->getAPIntValue();
763 unsigned LowZBits, MaskLen;
764 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
765 return false;
766
767 unsigned BitWidth = N.getValueSizeInBits();
768 SDLoc DL(LHS);
769 uint64_t NewShiftC;
770 unsigned NewShiftOp;
771 if (LHSOpcode == ISD::SHL) {
772 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
773 // BitWidth != LowZBits + MaskLen doesn't match the pattern
774 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
775 return false;
776
777 NewShiftC = LowZBits - ShiftAmtC;
778 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
779 } else {
780 if (LowZBits == 0)
781 return false;
782
783 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
784 NewShiftC = LowZBits + ShiftAmtC;
785 if (NewShiftC >= BitWidth)
786 return false;
787
788 // SRA need all high bits
789 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
790 return false;
791
792 // SRL high bits can be 0 or 1
793 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
794 return false;
795
796 if (LHSOpcode == ISD::SRL)
797 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
798 else
799 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
800 }
801
802 assert(NewShiftC < BitWidth && "Invalid shift amount");
803 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
804 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
805 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
806 NewShiftAmt, BitWidthMinus1),
807 0);
808 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
809 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
810 return true;
811}
812
813/// getExtendTypeForNode - Translate an extend node to the corresponding
814/// ExtendType value.
816getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
817 if (N.getOpcode() == ISD::SIGN_EXTEND ||
818 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
819 EVT SrcVT;
820 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
821 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
822 else
823 SrcVT = N.getOperand(0).getValueType();
824
825 if (!IsLoadStore && SrcVT == MVT::i8)
826 return AArch64_AM::SXTB;
827 else if (!IsLoadStore && SrcVT == MVT::i16)
828 return AArch64_AM::SXTH;
829 else if (SrcVT == MVT::i32)
830 return AArch64_AM::SXTW;
831 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
832
834 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
835 N.getOpcode() == ISD::ANY_EXTEND) {
836 EVT SrcVT = N.getOperand(0).getValueType();
837 if (!IsLoadStore && SrcVT == MVT::i8)
838 return AArch64_AM::UXTB;
839 else if (!IsLoadStore && SrcVT == MVT::i16)
840 return AArch64_AM::UXTH;
841 else if (SrcVT == MVT::i32)
842 return AArch64_AM::UXTW;
843 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
844
846 } else if (N.getOpcode() == ISD::AND) {
847 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
848 if (!CSD)
850 uint64_t AndMask = CSD->getZExtValue();
851
852 switch (AndMask) {
853 default:
855 case 0xFF:
856 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
857 case 0xFFFF:
858 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
859 case 0xFFFFFFFF:
860 return AArch64_AM::UXTW;
861 }
862 }
863
865}
866
867/// Determine whether it is worth to fold V into an extended register of an
868/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
869/// instruction, and the shift should be treated as worth folding even if has
870/// multiple uses.
871bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
872 // Trivial if we are optimizing for code size or if there is only
873 // one use of the value.
874 if (CurDAG->shouldOptForSize() || V.hasOneUse())
875 return true;
876
877 // If a subtarget has a fastpath LSL we can fold a logical shift into
878 // the add/sub and save a cycle.
879 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
880 V.getConstantOperandVal(1) <= 4 &&
882 return true;
883
884 // It hurts otherwise, since the value will be reused.
885 return false;
886}
887
888/// SelectShiftedRegister - Select a "shifted register" operand. If the value
889/// is not shifted, set the Shift operand to default of "LSL 0". The logical
890/// instructions allow the shifted register to be rotated, but the arithmetic
891/// instructions do not. The AllowROR parameter specifies whether ROR is
892/// supported.
893bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
894 SDValue &Reg, SDValue &Shift) {
895 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
896 return true;
897
899 if (ShType == AArch64_AM::InvalidShiftExtend)
900 return false;
901 if (!AllowROR && ShType == AArch64_AM::ROR)
902 return false;
903
904 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
905 unsigned BitSize = N.getValueSizeInBits();
906 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
907 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
908
909 Reg = N.getOperand(0);
910 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
911 return isWorthFoldingALU(N, true);
912 }
913
914 return false;
915}
916
917/// Instructions that accept extend modifiers like UXTW expect the register
918/// being extended to be a GPR32, but the incoming DAG might be acting on a
919/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
920/// this is the case.
922 if (N.getValueType() == MVT::i32)
923 return N;
924
925 SDLoc dl(N);
926 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
927}
928
929// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
930template<signed Low, signed High, signed Scale>
931bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
933 return false;
934
935 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
936 if ((MulImm % std::abs(Scale)) == 0) {
937 int64_t RDVLImm = MulImm / Scale;
938 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
939 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
940 return true;
941 }
942 }
943
944 return false;
945}
946
947// Returns a suitable RDSVL multiplier from a left shift.
948template <signed Low, signed High>
949bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
951 return false;
952
953 int64_t MulImm = 1LL << cast<ConstantSDNode>(N)->getSExtValue();
954 if (MulImm >= Low && MulImm <= High) {
955 Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
956 return true;
957 }
958
959 return false;
960}
961
962/// SelectArithExtendedRegister - Select a "extended register" operand. This
963/// operand folds in an extend followed by an optional left shift.
964bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
965 SDValue &Shift) {
966 unsigned ShiftVal = 0;
968
969 if (N.getOpcode() == ISD::SHL) {
970 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
971 if (!CSD)
972 return false;
973 ShiftVal = CSD->getZExtValue();
974 if (ShiftVal > 4)
975 return false;
976
977 Ext = getExtendTypeForNode(N.getOperand(0));
979 return false;
980
981 Reg = N.getOperand(0).getOperand(0);
982 } else {
985 return false;
986
987 Reg = N.getOperand(0);
988
989 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
990 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
991 auto isDef32 = [](SDValue N) {
992 unsigned Opc = N.getOpcode();
993 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
996 Opc != ISD::FREEZE;
997 };
998 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
999 isDef32(Reg))
1000 return false;
1001 }
1002
1003 // AArch64 mandates that the RHS of the operation must use the smallest
1004 // register class that could contain the size being extended from. Thus,
1005 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1006 // there might not be an actual 32-bit value in the program. We can
1007 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1008 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1009 Reg = narrowIfNeeded(CurDAG, Reg);
1010 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1011 MVT::i32);
1012 return isWorthFoldingALU(N);
1013}
1014
1015/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1016/// operand is referred by the instructions have SP operand
1017bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1018 SDValue &Shift) {
1019 unsigned ShiftVal = 0;
1021
1022 if (N.getOpcode() != ISD::SHL)
1023 return false;
1024
1025 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1026 if (!CSD)
1027 return false;
1028 ShiftVal = CSD->getZExtValue();
1029 if (ShiftVal > 4)
1030 return false;
1031
1033 Reg = N.getOperand(0);
1034 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1035 MVT::i32);
1036 return isWorthFoldingALU(N);
1037}
1038
1039/// If there's a use of this ADDlow that's not itself a load/store then we'll
1040/// need to create a real ADD instruction from it anyway and there's no point in
1041/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1042/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1043/// leads to duplicated ADRP instructions.
1045 for (auto *User : N->users()) {
1046 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1047 User->getOpcode() != ISD::ATOMIC_LOAD &&
1048 User->getOpcode() != ISD::ATOMIC_STORE)
1049 return false;
1050
1051 // ldar and stlr have much more restrictive addressing modes (just a
1052 // register).
1053 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1054 return false;
1055 }
1056
1057 return true;
1058}
1059
1060/// Check if the immediate offset is valid as a scaled immediate.
1061static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1062 unsigned Size) {
1063 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1064 Offset < (Range << Log2_32(Size)))
1065 return true;
1066 return false;
1067}
1068
1069/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1070/// immediate" address. The "Size" argument is the size in bytes of the memory
1071/// reference, which determines the scale.
1072bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1073 unsigned BW, unsigned Size,
1074 SDValue &Base,
1075 SDValue &OffImm) {
1076 SDLoc dl(N);
1077 const DataLayout &DL = CurDAG->getDataLayout();
1078 const TargetLowering *TLI = getTargetLowering();
1079 if (N.getOpcode() == ISD::FrameIndex) {
1080 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1081 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1082 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1083 return true;
1084 }
1085
1086 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1087 // selected here doesn't support labels/immediates, only base+offset.
1088 if (CurDAG->isBaseWithConstantOffset(N)) {
1089 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1090 if (IsSignedImm) {
1091 int64_t RHSC = RHS->getSExtValue();
1092 unsigned Scale = Log2_32(Size);
1093 int64_t Range = 0x1LL << (BW - 1);
1094
1095 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1096 RHSC < (Range << Scale)) {
1097 Base = N.getOperand(0);
1098 if (Base.getOpcode() == ISD::FrameIndex) {
1099 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1100 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1101 }
1102 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1103 return true;
1104 }
1105 } else {
1106 // unsigned Immediate
1107 uint64_t RHSC = RHS->getZExtValue();
1108 unsigned Scale = Log2_32(Size);
1109 uint64_t Range = 0x1ULL << BW;
1110
1111 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1112 Base = N.getOperand(0);
1113 if (Base.getOpcode() == ISD::FrameIndex) {
1114 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1115 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1116 }
1117 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1118 return true;
1119 }
1120 }
1121 }
1122 }
1123 // Base only. The address will be materialized into a register before
1124 // the memory is accessed.
1125 // add x0, Xbase, #offset
1126 // stp x1, x2, [x0]
1127 Base = N;
1128 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1129 return true;
1130}
1131
1132/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1133/// immediate" address. The "Size" argument is the size in bytes of the memory
1134/// reference, which determines the scale.
1135bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1136 SDValue &Base, SDValue &OffImm) {
1137 SDLoc dl(N);
1138 const DataLayout &DL = CurDAG->getDataLayout();
1139 const TargetLowering *TLI = getTargetLowering();
1140 if (N.getOpcode() == ISD::FrameIndex) {
1141 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1142 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1143 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1144 return true;
1145 }
1146
1147 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1148 GlobalAddressSDNode *GAN =
1149 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1150 Base = N.getOperand(0);
1151 OffImm = N.getOperand(1);
1152 if (!GAN)
1153 return true;
1154
1155 if (GAN->getOffset() % Size == 0 &&
1157 return true;
1158 }
1159
1160 if (CurDAG->isBaseWithConstantOffset(N)) {
1161 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1162 int64_t RHSC = (int64_t)RHS->getZExtValue();
1163 unsigned Scale = Log2_32(Size);
1164 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1165 Base = N.getOperand(0);
1166 if (Base.getOpcode() == ISD::FrameIndex) {
1167 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1168 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1169 }
1170 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1171 return true;
1172 }
1173 }
1174 }
1175
1176 // Before falling back to our general case, check if the unscaled
1177 // instructions can handle this. If so, that's preferable.
1178 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1179 return false;
1180
1181 // Base only. The address will be materialized into a register before
1182 // the memory is accessed.
1183 // add x0, Xbase, #offset
1184 // ldr x0, [x0]
1185 Base = N;
1186 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1187 return true;
1188}
1189
1190/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1191/// immediate" address. This should only match when there is an offset that
1192/// is not valid for a scaled immediate addressing mode. The "Size" argument
1193/// is the size in bytes of the memory reference, which is needed here to know
1194/// what is valid for a scaled immediate.
1195bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1196 SDValue &Base,
1197 SDValue &OffImm) {
1198 if (!CurDAG->isBaseWithConstantOffset(N))
1199 return false;
1200 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1201 int64_t RHSC = RHS->getSExtValue();
1202 if (RHSC >= -256 && RHSC < 256) {
1203 Base = N.getOperand(0);
1204 if (Base.getOpcode() == ISD::FrameIndex) {
1205 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1206 const TargetLowering *TLI = getTargetLowering();
1207 Base = CurDAG->getTargetFrameIndex(
1208 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1209 }
1210 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1211 return true;
1212 }
1213 }
1214 return false;
1215}
1216
1218 SDLoc dl(N);
1219 SDValue ImpDef = SDValue(
1220 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1221 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1222 N);
1223}
1224
1225/// Check if the given SHL node (\p N), can be used to form an
1226/// extended register for an addressing mode.
1227bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1228 bool WantExtend, SDValue &Offset,
1229 SDValue &SignExtend) {
1230 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1231 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1232 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1233 return false;
1234
1235 SDLoc dl(N);
1236 if (WantExtend) {
1238 getExtendTypeForNode(N.getOperand(0), true);
1240 return false;
1241
1242 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1243 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1244 MVT::i32);
1245 } else {
1246 Offset = N.getOperand(0);
1247 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1248 }
1249
1250 unsigned LegalShiftVal = Log2_32(Size);
1251 unsigned ShiftVal = CSD->getZExtValue();
1252
1253 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1254 return false;
1255
1256 return isWorthFoldingAddr(N, Size);
1257}
1258
1259bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1261 SDValue &SignExtend,
1262 SDValue &DoShift) {
1263 if (N.getOpcode() != ISD::ADD)
1264 return false;
1265 SDValue LHS = N.getOperand(0);
1266 SDValue RHS = N.getOperand(1);
1267 SDLoc dl(N);
1268
1269 // We don't want to match immediate adds here, because they are better lowered
1270 // to the register-immediate addressing modes.
1272 return false;
1273
1274 // Check if this particular node is reused in any non-memory related
1275 // operation. If yes, do not try to fold this node into the address
1276 // computation, since the computation will be kept.
1277 const SDNode *Node = N.getNode();
1278 for (SDNode *UI : Node->users()) {
1279 if (!isMemOpOrPrefetch(UI))
1280 return false;
1281 }
1282
1283 // Remember if it is worth folding N when it produces extended register.
1284 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1285
1286 // Try to match a shifted extend on the RHS.
1287 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1288 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1289 Base = LHS;
1290 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1291 return true;
1292 }
1293
1294 // Try to match a shifted extend on the LHS.
1295 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1296 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1297 Base = RHS;
1298 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1299 return true;
1300 }
1301
1302 // There was no shift, whatever else we find.
1303 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1304
1306 // Try to match an unshifted extend on the LHS.
1307 if (IsExtendedRegisterWorthFolding &&
1308 (Ext = getExtendTypeForNode(LHS, true)) !=
1310 Base = RHS;
1311 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1312 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1313 MVT::i32);
1314 if (isWorthFoldingAddr(LHS, Size))
1315 return true;
1316 }
1317
1318 // Try to match an unshifted extend on the RHS.
1319 if (IsExtendedRegisterWorthFolding &&
1320 (Ext = getExtendTypeForNode(RHS, true)) !=
1322 Base = LHS;
1323 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1324 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1325 MVT::i32);
1326 if (isWorthFoldingAddr(RHS, Size))
1327 return true;
1328 }
1329
1330 return false;
1331}
1332
1333// Check if the given immediate is preferred by ADD. If an immediate can be
1334// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1335// encoded by one MOVZ, return true.
1336static bool isPreferredADD(int64_t ImmOff) {
1337 // Constant in [0x0, 0xfff] can be encoded in ADD.
1338 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1339 return true;
1340 // Check if it can be encoded in an "ADD LSL #12".
1341 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1342 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1343 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1344 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1345 return false;
1346}
1347
1348bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1350 SDValue &SignExtend,
1351 SDValue &DoShift) {
1352 if (N.getOpcode() != ISD::ADD)
1353 return false;
1354 SDValue LHS = N.getOperand(0);
1355 SDValue RHS = N.getOperand(1);
1356 SDLoc DL(N);
1357
1358 // Check if this particular node is reused in any non-memory related
1359 // operation. If yes, do not try to fold this node into the address
1360 // computation, since the computation will be kept.
1361 const SDNode *Node = N.getNode();
1362 for (SDNode *UI : Node->users()) {
1363 if (!isMemOpOrPrefetch(UI))
1364 return false;
1365 }
1366
1367 // Watch out if RHS is a wide immediate, it can not be selected into
1368 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1369 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1370 // instructions like:
1371 // MOV X0, WideImmediate
1372 // ADD X1, BaseReg, X0
1373 // LDR X2, [X1, 0]
1374 // For such situation, using [BaseReg, XReg] addressing mode can save one
1375 // ADD/SUB:
1376 // MOV X0, WideImmediate
1377 // LDR X2, [BaseReg, X0]
1378 if (isa<ConstantSDNode>(RHS)) {
1379 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1380 // Skip the immediate can be selected by load/store addressing mode.
1381 // Also skip the immediate can be encoded by a single ADD (SUB is also
1382 // checked by using -ImmOff).
1383 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1384 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1385 return false;
1386
1387 SDValue Ops[] = { RHS };
1388 SDNode *MOVI =
1389 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1390 SDValue MOVIV = SDValue(MOVI, 0);
1391 // This ADD of two X register will be selected into [Reg+Reg] mode.
1392 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1393 }
1394
1395 // Remember if it is worth folding N when it produces extended register.
1396 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1397
1398 // Try to match a shifted extend on the RHS.
1399 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1400 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1401 Base = LHS;
1402 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1403 return true;
1404 }
1405
1406 // Try to match a shifted extend on the LHS.
1407 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1408 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1409 Base = RHS;
1410 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1411 return true;
1412 }
1413
1414 // Match any non-shifted, non-extend, non-immediate add expression.
1415 Base = LHS;
1416 Offset = RHS;
1417 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1418 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1419 // Reg1 + Reg2 is free: no check needed.
1420 return true;
1421}
1422
1423SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1424 static const unsigned RegClassIDs[] = {
1425 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1426 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1427 AArch64::dsub2, AArch64::dsub3};
1428
1429 return createTuple(Regs, RegClassIDs, SubRegs);
1430}
1431
1432SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1433 static const unsigned RegClassIDs[] = {
1434 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1435 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1436 AArch64::qsub2, AArch64::qsub3};
1437
1438 return createTuple(Regs, RegClassIDs, SubRegs);
1439}
1440
1441SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1442 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1443 AArch64::ZPR3RegClassID,
1444 AArch64::ZPR4RegClassID};
1445 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1446 AArch64::zsub2, AArch64::zsub3};
1447
1448 return createTuple(Regs, RegClassIDs, SubRegs);
1449}
1450
1451SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1452 assert(Regs.size() == 2 || Regs.size() == 4);
1453
1454 // The createTuple interface requires 3 RegClassIDs for each possible
1455 // tuple type even though we only have them for ZPR2 and ZPR4.
1456 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1457 AArch64::ZPR4Mul4RegClassID};
1458 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1459 AArch64::zsub2, AArch64::zsub3};
1460 return createTuple(Regs, RegClassIDs, SubRegs);
1461}
1462
1463SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1464 const unsigned RegClassIDs[],
1465 const unsigned SubRegs[]) {
1466 // There's no special register-class for a vector-list of 1 element: it's just
1467 // a vector.
1468 if (Regs.size() == 1)
1469 return Regs[0];
1470
1471 assert(Regs.size() >= 2 && Regs.size() <= 4);
1472
1473 SDLoc DL(Regs[0]);
1474
1476
1477 // First operand of REG_SEQUENCE is the desired RegClass.
1478 Ops.push_back(
1479 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1480
1481 // Then we get pairs of source & subregister-position for the components.
1482 for (unsigned i = 0; i < Regs.size(); ++i) {
1483 Ops.push_back(Regs[i]);
1484 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1485 }
1486
1487 SDNode *N =
1488 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1489 return SDValue(N, 0);
1490}
1491
1492void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1493 bool isExt) {
1494 SDLoc dl(N);
1495 EVT VT = N->getValueType(0);
1496
1497 unsigned ExtOff = isExt;
1498
1499 // Form a REG_SEQUENCE to force register allocation.
1500 unsigned Vec0Off = ExtOff + 1;
1501 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1502 SDValue RegSeq = createQTuple(Regs);
1503
1505 if (isExt)
1506 Ops.push_back(N->getOperand(1));
1507 Ops.push_back(RegSeq);
1508 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1509 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1510}
1511
1512static std::tuple<SDValue, SDValue>
1514 SDLoc DL(Disc);
1515 SDValue AddrDisc;
1516 SDValue ConstDisc;
1517
1518 // If this is a blend, remember the constant and address discriminators.
1519 // Otherwise, it's either a constant discriminator, or a non-blended
1520 // address discriminator.
1521 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1522 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1523 AddrDisc = Disc->getOperand(1);
1524 ConstDisc = Disc->getOperand(2);
1525 } else {
1526 ConstDisc = Disc;
1527 }
1528
1529 // If the constant discriminator (either the blend RHS, or the entire
1530 // discriminator value) isn't a 16-bit constant, bail out, and let the
1531 // discriminator be computed separately.
1532 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1533 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1534 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1535
1536 // If there's no address discriminator, use XZR directly.
1537 if (!AddrDisc)
1538 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1539
1540 return std::make_tuple(
1541 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1542 AddrDisc);
1543}
1544
1545void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1546 SDLoc DL(N);
1547 // IntrinsicID is operand #0
1548 SDValue Val = N->getOperand(1);
1549 SDValue AUTKey = N->getOperand(2);
1550 SDValue AUTDisc = N->getOperand(3);
1551
1552 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1553 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1554
1555 SDValue AUTAddrDisc, AUTConstDisc;
1556 std::tie(AUTConstDisc, AUTAddrDisc) =
1557 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1558
1559 if (!Subtarget->isX16X17Safer()) {
1560 SDValue Ops[] = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1561
1562 SDNode *AUT =
1563 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1564 ReplaceNode(N, AUT);
1565 } else {
1566 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1567 AArch64::X16, Val, SDValue());
1568 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1569
1570 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1571 ReplaceNode(N, AUT);
1572 }
1573}
1574
1575void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1576 SDLoc DL(N);
1577 // IntrinsicID is operand #0
1578 SDValue Val = N->getOperand(1);
1579 SDValue AUTKey = N->getOperand(2);
1580 SDValue AUTDisc = N->getOperand(3);
1581 SDValue PACKey = N->getOperand(4);
1582 SDValue PACDisc = N->getOperand(5);
1583
1584 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1585 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1586
1587 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1588 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1589
1590 SDValue AUTAddrDisc, AUTConstDisc;
1591 std::tie(AUTConstDisc, AUTAddrDisc) =
1592 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1593
1594 SDValue PACAddrDisc, PACConstDisc;
1595 std::tie(PACConstDisc, PACAddrDisc) =
1596 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1597
1598 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1599 AArch64::X16, Val, SDValue());
1600
1601 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1602 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1603
1604 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1605 ReplaceNode(N, AUTPAC);
1606}
1607
1608bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1609 LoadSDNode *LD = cast<LoadSDNode>(N);
1610 if (LD->isUnindexed())
1611 return false;
1612 EVT VT = LD->getMemoryVT();
1613 EVT DstVT = N->getValueType(0);
1614 ISD::MemIndexedMode AM = LD->getAddressingMode();
1615 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1616 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1617 int OffsetVal = (int)OffsetOp->getZExtValue();
1618
1619 // We're not doing validity checking here. That was done when checking
1620 // if we should mark the load as indexed or not. We're just selecting
1621 // the right instruction.
1622 unsigned Opcode = 0;
1623
1624 ISD::LoadExtType ExtType = LD->getExtensionType();
1625 bool InsertTo64 = false;
1626 if (VT == MVT::i64)
1627 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1628 else if (VT == MVT::i32) {
1629 if (ExtType == ISD::NON_EXTLOAD)
1630 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1631 else if (ExtType == ISD::SEXTLOAD)
1632 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1633 else {
1634 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1635 InsertTo64 = true;
1636 // The result of the load is only i32. It's the subreg_to_reg that makes
1637 // it into an i64.
1638 DstVT = MVT::i32;
1639 }
1640 } else if (VT == MVT::i16) {
1641 if (ExtType == ISD::SEXTLOAD) {
1642 if (DstVT == MVT::i64)
1643 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1644 else
1645 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1646 } else {
1647 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1648 InsertTo64 = DstVT == MVT::i64;
1649 // The result of the load is only i32. It's the subreg_to_reg that makes
1650 // it into an i64.
1651 DstVT = MVT::i32;
1652 }
1653 } else if (VT == MVT::i8) {
1654 if (ExtType == ISD::SEXTLOAD) {
1655 if (DstVT == MVT::i64)
1656 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1657 else
1658 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1659 } else {
1660 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1661 InsertTo64 = DstVT == MVT::i64;
1662 // The result of the load is only i32. It's the subreg_to_reg that makes
1663 // it into an i64.
1664 DstVT = MVT::i32;
1665 }
1666 } else if (VT == MVT::f16) {
1667 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1668 } else if (VT == MVT::bf16) {
1669 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1670 } else if (VT == MVT::f32) {
1671 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1672 } else if (VT == MVT::f64 ||
1673 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1674 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1675 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1676 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1677 } else if (VT.is64BitVector()) {
1678 if (IsPre || OffsetVal != 8)
1679 return false;
1680 switch (VT.getScalarSizeInBits()) {
1681 case 8:
1682 Opcode = AArch64::LD1Onev8b_POST;
1683 break;
1684 case 16:
1685 Opcode = AArch64::LD1Onev4h_POST;
1686 break;
1687 case 32:
1688 Opcode = AArch64::LD1Onev2s_POST;
1689 break;
1690 case 64:
1691 Opcode = AArch64::LD1Onev1d_POST;
1692 break;
1693 default:
1694 llvm_unreachable("Expected vector element to be a power of 2");
1695 }
1696 } else if (VT.is128BitVector()) {
1697 if (IsPre || OffsetVal != 16)
1698 return false;
1699 switch (VT.getScalarSizeInBits()) {
1700 case 8:
1701 Opcode = AArch64::LD1Onev16b_POST;
1702 break;
1703 case 16:
1704 Opcode = AArch64::LD1Onev8h_POST;
1705 break;
1706 case 32:
1707 Opcode = AArch64::LD1Onev4s_POST;
1708 break;
1709 case 64:
1710 Opcode = AArch64::LD1Onev2d_POST;
1711 break;
1712 default:
1713 llvm_unreachable("Expected vector element to be a power of 2");
1714 }
1715 } else
1716 return false;
1717 SDValue Chain = LD->getChain();
1718 SDValue Base = LD->getBasePtr();
1719 SDLoc dl(N);
1720 // LD1 encodes an immediate offset by using XZR as the offset register.
1721 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1722 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1723 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1724 SDValue Ops[] = { Base, Offset, Chain };
1725 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1726 MVT::Other, Ops);
1727
1728 // Transfer memoperands.
1729 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1730 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1731
1732 // Either way, we're replacing the node, so tell the caller that.
1733 SDValue LoadedVal = SDValue(Res, 1);
1734 if (InsertTo64) {
1735 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1736 LoadedVal =
1737 SDValue(CurDAG->getMachineNode(
1738 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1739 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1740 SubReg),
1741 0);
1742 }
1743
1744 ReplaceUses(SDValue(N, 0), LoadedVal);
1745 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1746 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1747 CurDAG->RemoveDeadNode(N);
1748 return true;
1749}
1750
1751void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1752 unsigned SubRegIdx) {
1753 SDLoc dl(N);
1754 EVT VT = N->getValueType(0);
1755 SDValue Chain = N->getOperand(0);
1756
1757 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1758 Chain};
1759
1760 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1761
1762 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1763 SDValue SuperReg = SDValue(Ld, 0);
1764 for (unsigned i = 0; i < NumVecs; ++i)
1765 ReplaceUses(SDValue(N, i),
1766 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1767
1768 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1769
1770 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1771 // because it's too simple to have needed special treatment during lowering.
1772 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1773 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1774 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1775 }
1776
1777 CurDAG->RemoveDeadNode(N);
1778}
1779
1780void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1781 unsigned Opc, unsigned SubRegIdx) {
1782 SDLoc dl(N);
1783 EVT VT = N->getValueType(0);
1784 SDValue Chain = N->getOperand(0);
1785
1786 SDValue Ops[] = {N->getOperand(1), // Mem operand
1787 N->getOperand(2), // Incremental
1788 Chain};
1789
1790 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1791 MVT::Untyped, MVT::Other};
1792
1793 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1794
1795 // Update uses of write back register
1796 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1797
1798 // Update uses of vector list
1799 SDValue SuperReg = SDValue(Ld, 1);
1800 if (NumVecs == 1)
1801 ReplaceUses(SDValue(N, 0), SuperReg);
1802 else
1803 for (unsigned i = 0; i < NumVecs; ++i)
1804 ReplaceUses(SDValue(N, i),
1805 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1806
1807 // Update the chain
1808 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1809 CurDAG->RemoveDeadNode(N);
1810}
1811
1812/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1813/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1814/// new Base and an SDValue representing the new offset.
1815std::tuple<unsigned, SDValue, SDValue>
1816AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1817 unsigned Opc_ri,
1818 const SDValue &OldBase,
1819 const SDValue &OldOffset,
1820 unsigned Scale) {
1821 SDValue NewBase = OldBase;
1822 SDValue NewOffset = OldOffset;
1823 // Detect a possible Reg+Imm addressing mode.
1824 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1825 N, OldBase, NewBase, NewOffset);
1826
1827 // Detect a possible reg+reg addressing mode, but only if we haven't already
1828 // detected a Reg+Imm one.
1829 const bool IsRegReg =
1830 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1831
1832 // Select the instruction.
1833 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1834}
1835
1836enum class SelectTypeKind {
1837 Int1 = 0,
1838 Int = 1,
1839 FP = 2,
1841};
1842
1843/// This function selects an opcode from a list of opcodes, which is
1844/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1845/// element types, in this order.
1846template <SelectTypeKind Kind>
1847static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1848 // Only match scalable vector VTs
1849 if (!VT.isScalableVector())
1850 return 0;
1851
1852 EVT EltVT = VT.getVectorElementType();
1853 unsigned Key = VT.getVectorMinNumElements();
1854 switch (Kind) {
1856 break;
1858 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1859 EltVT != MVT::i64)
1860 return 0;
1861 break;
1863 if (EltVT != MVT::i1)
1864 return 0;
1865 break;
1866 case SelectTypeKind::FP:
1867 if (EltVT == MVT::bf16)
1868 Key = 16;
1869 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1870 EltVT != MVT::f64)
1871 return 0;
1872 break;
1873 }
1874
1875 unsigned Offset;
1876 switch (Key) {
1877 case 16: // 8-bit or bf16
1878 Offset = 0;
1879 break;
1880 case 8: // 16-bit
1881 Offset = 1;
1882 break;
1883 case 4: // 32-bit
1884 Offset = 2;
1885 break;
1886 case 2: // 64-bit
1887 Offset = 3;
1888 break;
1889 default:
1890 return 0;
1891 }
1892
1893 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1894}
1895
1896// This function is almost identical to SelectWhilePair, but has an
1897// extra check on the range of the immediate operand.
1898// TODO: Merge these two functions together at some point?
1899void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1900 // Immediate can be either 0 or 1.
1901 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1902 if (Imm->getZExtValue() > 1)
1903 return;
1904
1905 SDLoc DL(N);
1906 EVT VT = N->getValueType(0);
1907 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1908 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1909 SDValue SuperReg = SDValue(WhilePair, 0);
1910
1911 for (unsigned I = 0; I < 2; ++I)
1912 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1913 AArch64::psub0 + I, DL, VT, SuperReg));
1914
1915 CurDAG->RemoveDeadNode(N);
1916}
1917
1918void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1919 SDLoc DL(N);
1920 EVT VT = N->getValueType(0);
1921
1922 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1923
1924 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1925 SDValue SuperReg = SDValue(WhilePair, 0);
1926
1927 for (unsigned I = 0; I < 2; ++I)
1928 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1929 AArch64::psub0 + I, DL, VT, SuperReg));
1930
1931 CurDAG->RemoveDeadNode(N);
1932}
1933
1934void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1935 unsigned Opcode) {
1936 EVT VT = N->getValueType(0);
1937 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
1938 SDValue Ops = createZTuple(Regs);
1939 SDLoc DL(N);
1940 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1941 SDValue SuperReg = SDValue(Intrinsic, 0);
1942 for (unsigned i = 0; i < NumVecs; ++i)
1943 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1944 AArch64::zsub0 + i, DL, VT, SuperReg));
1945
1946 CurDAG->RemoveDeadNode(N);
1947}
1948
1949void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
1950 unsigned Opcode) {
1951 SDLoc DL(N);
1952 EVT VT = N->getValueType(0);
1953 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
1954 Ops.push_back(/*Chain*/ N->getOperand(0));
1955
1956 SDNode *Instruction =
1957 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
1958 SDValue SuperReg = SDValue(Instruction, 0);
1959
1960 for (unsigned i = 0; i < NumVecs; ++i)
1961 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1962 AArch64::zsub0 + i, DL, VT, SuperReg));
1963
1964 // Copy chain
1965 unsigned ChainIdx = NumVecs;
1966 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
1967 CurDAG->RemoveDeadNode(N);
1968}
1969
1970void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1971 unsigned NumVecs,
1972 bool IsZmMulti,
1973 unsigned Opcode,
1974 bool HasPred) {
1975 assert(Opcode != 0 && "Unexpected opcode");
1976
1977 SDLoc DL(N);
1978 EVT VT = N->getValueType(0);
1979 unsigned FirstVecIdx = HasPred ? 2 : 1;
1980
1981 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1982 SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs));
1983 return createZMulTuple(Regs);
1984 };
1985
1986 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1987
1988 SDValue Zm;
1989 if (IsZmMulti)
1990 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1991 else
1992 Zm = N->getOperand(NumVecs + FirstVecIdx);
1993
1994 SDNode *Intrinsic;
1995 if (HasPred)
1996 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1997 N->getOperand(1), Zdn, Zm);
1998 else
1999 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
2000 SDValue SuperReg = SDValue(Intrinsic, 0);
2001 for (unsigned i = 0; i < NumVecs; ++i)
2002 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2003 AArch64::zsub0 + i, DL, VT, SuperReg));
2004
2005 CurDAG->RemoveDeadNode(N);
2006}
2007
2008void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2009 unsigned Scale, unsigned Opc_ri,
2010 unsigned Opc_rr, bool IsIntr) {
2011 assert(Scale < 5 && "Invalid scaling value.");
2012 SDLoc DL(N);
2013 EVT VT = N->getValueType(0);
2014 SDValue Chain = N->getOperand(0);
2015
2016 // Optimize addressing mode.
2018 unsigned Opc;
2019 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2020 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
2021 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2022
2023 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2024 Base, // Memory operand
2025 Offset, Chain};
2026
2027 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2028
2029 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2030 SDValue SuperReg = SDValue(Load, 0);
2031 for (unsigned i = 0; i < NumVecs; ++i)
2032 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2033 AArch64::zsub0 + i, DL, VT, SuperReg));
2034
2035 // Copy chain
2036 unsigned ChainIdx = NumVecs;
2037 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2038 CurDAG->RemoveDeadNode(N);
2039}
2040
2041void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2042 unsigned NumVecs,
2043 unsigned Scale,
2044 unsigned Opc_ri,
2045 unsigned Opc_rr) {
2046 assert(Scale < 4 && "Invalid scaling value.");
2047 SDLoc DL(N);
2048 EVT VT = N->getValueType(0);
2049 SDValue Chain = N->getOperand(0);
2050
2051 SDValue PNg = N->getOperand(2);
2052 SDValue Base = N->getOperand(3);
2053 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2054 unsigned Opc;
2055 std::tie(Opc, Base, Offset) =
2056 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2057
2058 SDValue Ops[] = {PNg, // Predicate-as-counter
2059 Base, // Memory operand
2060 Offset, Chain};
2061
2062 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2063
2064 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2065 SDValue SuperReg = SDValue(Load, 0);
2066 for (unsigned i = 0; i < NumVecs; ++i)
2067 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2068 AArch64::zsub0 + i, DL, VT, SuperReg));
2069
2070 // Copy chain
2071 unsigned ChainIdx = NumVecs;
2072 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2073 CurDAG->RemoveDeadNode(N);
2074}
2075
2076void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2077 unsigned Opcode) {
2078 if (N->getValueType(0) != MVT::nxv4f32)
2079 return;
2080 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2081}
2082
2083void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2084 unsigned NumOutVecs,
2085 unsigned Opc,
2086 uint32_t MaxImm) {
2087 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2088 if (Imm->getZExtValue() > MaxImm)
2089 return;
2090
2091 SDValue ZtValue;
2092 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2093 return;
2094
2095 SDValue Chain = Node->getOperand(0);
2096 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4), Chain};
2097 SDLoc DL(Node);
2098 EVT VT = Node->getValueType(0);
2099
2100 SDNode *Instruction =
2101 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2102 SDValue SuperReg = SDValue(Instruction, 0);
2103
2104 for (unsigned I = 0; I < NumOutVecs; ++I)
2105 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2106 AArch64::zsub0 + I, DL, VT, SuperReg));
2107
2108 // Copy chain
2109 unsigned ChainIdx = NumOutVecs;
2110 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2111 CurDAG->RemoveDeadNode(Node);
2112}
2113
2114void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2115 unsigned NumOutVecs,
2116 unsigned Opc) {
2117 SDValue ZtValue;
2118 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2119 return;
2120
2121 SDValue Chain = Node->getOperand(0);
2122 SDValue Ops[] = {ZtValue,
2123 createZMulTuple({Node->getOperand(3), Node->getOperand(4)}),
2124 Chain};
2125
2126 SDLoc DL(Node);
2127 EVT VT = Node->getValueType(0);
2128
2129 SDNode *Instruction =
2130 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2131 SDValue SuperReg = SDValue(Instruction, 0);
2132
2133 for (unsigned I = 0; I < NumOutVecs; ++I)
2134 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2135 AArch64::zsub0 + I, DL, VT, SuperReg));
2136
2137 // Copy chain
2138 unsigned ChainIdx = NumOutVecs;
2139 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2140 CurDAG->RemoveDeadNode(Node);
2141}
2142
2143void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2144 unsigned Op) {
2145 SDLoc DL(N);
2146 EVT VT = N->getValueType(0);
2147
2148 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2149 SDValue Zd = createZMulTuple(Regs);
2150 SDValue Zn = N->getOperand(1 + NumVecs);
2151 SDValue Zm = N->getOperand(2 + NumVecs);
2152
2153 SDValue Ops[] = {Zd, Zn, Zm};
2154
2155 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2156 SDValue SuperReg = SDValue(Intrinsic, 0);
2157 for (unsigned i = 0; i < NumVecs; ++i)
2158 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2159 AArch64::zsub0 + i, DL, VT, SuperReg));
2160
2161 CurDAG->RemoveDeadNode(N);
2162}
2163
2164bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2165 switch (BaseReg) {
2166 default:
2167 return false;
2168 case AArch64::ZA:
2169 case AArch64::ZAB0:
2170 if (TileNum == 0)
2171 break;
2172 return false;
2173 case AArch64::ZAH0:
2174 if (TileNum <= 1)
2175 break;
2176 return false;
2177 case AArch64::ZAS0:
2178 if (TileNum <= 3)
2179 break;
2180 return false;
2181 case AArch64::ZAD0:
2182 if (TileNum <= 7)
2183 break;
2184 return false;
2185 }
2186
2187 BaseReg += TileNum;
2188 return true;
2189}
2190
2191template <unsigned MaxIdx, unsigned Scale>
2192void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2193 unsigned BaseReg, unsigned Op) {
2194 unsigned TileNum = 0;
2195 if (BaseReg != AArch64::ZA)
2196 TileNum = N->getConstantOperandVal(2);
2197
2198 if (!SelectSMETile(BaseReg, TileNum))
2199 return;
2200
2201 SDValue SliceBase, Base, Offset;
2202 if (BaseReg == AArch64::ZA)
2203 SliceBase = N->getOperand(2);
2204 else
2205 SliceBase = N->getOperand(3);
2206
2207 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2208 return;
2209
2210 SDLoc DL(N);
2211 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2212 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2213 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2214
2215 EVT VT = N->getValueType(0);
2216 for (unsigned I = 0; I < NumVecs; ++I)
2217 ReplaceUses(SDValue(N, I),
2218 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2219 SDValue(Mov, 0)));
2220 // Copy chain
2221 unsigned ChainIdx = NumVecs;
2222 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2223 CurDAG->RemoveDeadNode(N);
2224}
2225
2226void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2227 unsigned Op, unsigned MaxIdx,
2228 unsigned Scale, unsigned BaseReg) {
2229 // Slice can be in different positions
2230 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2231 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2232 SDValue SliceBase = N->getOperand(2);
2233 if (BaseReg != AArch64::ZA)
2234 SliceBase = N->getOperand(3);
2235
2237 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2238 return;
2239 // The correct Za tile number is computed in Machine Instruction
2240 // See EmitZAInstr
2241 // DAG cannot select Za tile as an output register with ZReg
2242 SDLoc DL(N);
2244 if (BaseReg != AArch64::ZA )
2245 Ops.push_back(N->getOperand(2));
2246 Ops.push_back(Base);
2247 Ops.push_back(Offset);
2248 Ops.push_back(N->getOperand(0)); //Chain
2249 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2250
2251 EVT VT = N->getValueType(0);
2252 for (unsigned I = 0; I < NumVecs; ++I)
2253 ReplaceUses(SDValue(N, I),
2254 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2255 SDValue(Mov, 0)));
2256
2257 // Copy chain
2258 unsigned ChainIdx = NumVecs;
2259 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2260 CurDAG->RemoveDeadNode(N);
2261}
2262
2263void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2264 unsigned NumOutVecs,
2265 bool IsTupleInput,
2266 unsigned Opc) {
2267 SDLoc DL(N);
2268 EVT VT = N->getValueType(0);
2269 unsigned NumInVecs = N->getNumOperands() - 1;
2270
2272 if (IsTupleInput) {
2273 assert((NumInVecs == 2 || NumInVecs == 4) &&
2274 "Don't know how to handle multi-register input!");
2275 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2276 Ops.push_back(createZMulTuple(Regs));
2277 } else {
2278 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2279 for (unsigned I = 0; I < NumInVecs; I++)
2280 Ops.push_back(N->getOperand(1 + I));
2281 }
2282
2283 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2284 SDValue SuperReg = SDValue(Res, 0);
2285
2286 for (unsigned I = 0; I < NumOutVecs; I++)
2287 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2288 AArch64::zsub0 + I, DL, VT, SuperReg));
2289 CurDAG->RemoveDeadNode(N);
2290}
2291
2292void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2293 unsigned Opc) {
2294 SDLoc dl(N);
2295 EVT VT = N->getOperand(2)->getValueType(0);
2296
2297 // Form a REG_SEQUENCE to force register allocation.
2298 bool Is128Bit = VT.getSizeInBits() == 128;
2299 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2300 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2301
2302 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2303 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2304
2305 // Transfer memoperands.
2306 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2307 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2308
2309 ReplaceNode(N, St);
2310}
2311
2312void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2313 unsigned Scale, unsigned Opc_rr,
2314 unsigned Opc_ri) {
2315 SDLoc dl(N);
2316
2317 // Form a REG_SEQUENCE to force register allocation.
2318 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2319 SDValue RegSeq = createZTuple(Regs);
2320
2321 // Optimize addressing mode.
2322 unsigned Opc;
2324 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2325 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2326 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2327
2328 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2329 Base, // address
2330 Offset, // offset
2331 N->getOperand(0)}; // chain
2332 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2333
2334 ReplaceNode(N, St);
2335}
2336
2337bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2338 SDValue &OffImm) {
2339 SDLoc dl(N);
2340 const DataLayout &DL = CurDAG->getDataLayout();
2341 const TargetLowering *TLI = getTargetLowering();
2342
2343 // Try to match it for the frame address
2344 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2345 int FI = FINode->getIndex();
2346 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2347 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2348 return true;
2349 }
2350
2351 return false;
2352}
2353
2354void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2355 unsigned Opc) {
2356 SDLoc dl(N);
2357 EVT VT = N->getOperand(2)->getValueType(0);
2358 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2359 MVT::Other}; // Type for the Chain
2360
2361 // Form a REG_SEQUENCE to force register allocation.
2362 bool Is128Bit = VT.getSizeInBits() == 128;
2363 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2364 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2365
2366 SDValue Ops[] = {RegSeq,
2367 N->getOperand(NumVecs + 1), // base register
2368 N->getOperand(NumVecs + 2), // Incremental
2369 N->getOperand(0)}; // Chain
2370 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2371
2372 ReplaceNode(N, St);
2373}
2374
2375namespace {
2376/// WidenVector - Given a value in the V64 register class, produce the
2377/// equivalent value in the V128 register class.
2378class WidenVector {
2379 SelectionDAG &DAG;
2380
2381public:
2382 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2383
2384 SDValue operator()(SDValue V64Reg) {
2385 EVT VT = V64Reg.getValueType();
2386 unsigned NarrowSize = VT.getVectorNumElements();
2387 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2388 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2389 SDLoc DL(V64Reg);
2390
2391 SDValue Undef =
2392 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2393 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2394 }
2395};
2396} // namespace
2397
2398/// NarrowVector - Given a value in the V128 register class, produce the
2399/// equivalent value in the V64 register class.
2401 EVT VT = V128Reg.getValueType();
2402 unsigned WideSize = VT.getVectorNumElements();
2403 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2404 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2405
2406 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2407 V128Reg);
2408}
2409
2410void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2411 unsigned Opc) {
2412 SDLoc dl(N);
2413 EVT VT = N->getValueType(0);
2414 bool Narrow = VT.getSizeInBits() == 64;
2415
2416 // Form a REG_SEQUENCE to force register allocation.
2417 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2418
2419 if (Narrow)
2420 transform(Regs, Regs.begin(),
2421 WidenVector(*CurDAG));
2422
2423 SDValue RegSeq = createQTuple(Regs);
2424
2425 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2426
2427 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2428
2429 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2430 N->getOperand(NumVecs + 3), N->getOperand(0)};
2431 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2432 SDValue SuperReg = SDValue(Ld, 0);
2433
2434 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2435 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2436 AArch64::qsub2, AArch64::qsub3 };
2437 for (unsigned i = 0; i < NumVecs; ++i) {
2438 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2439 if (Narrow)
2440 NV = NarrowVector(NV, *CurDAG);
2441 ReplaceUses(SDValue(N, i), NV);
2442 }
2443
2444 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2445 CurDAG->RemoveDeadNode(N);
2446}
2447
2448void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2449 unsigned Opc) {
2450 SDLoc dl(N);
2451 EVT VT = N->getValueType(0);
2452 bool Narrow = VT.getSizeInBits() == 64;
2453
2454 // Form a REG_SEQUENCE to force register allocation.
2455 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2456
2457 if (Narrow)
2458 transform(Regs, Regs.begin(),
2459 WidenVector(*CurDAG));
2460
2461 SDValue RegSeq = createQTuple(Regs);
2462
2463 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2464 RegSeq->getValueType(0), MVT::Other};
2465
2466 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2467
2468 SDValue Ops[] = {RegSeq,
2469 CurDAG->getTargetConstant(LaneNo, dl,
2470 MVT::i64), // Lane Number
2471 N->getOperand(NumVecs + 2), // Base register
2472 N->getOperand(NumVecs + 3), // Incremental
2473 N->getOperand(0)};
2474 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2475
2476 // Update uses of the write back register
2477 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2478
2479 // Update uses of the vector list
2480 SDValue SuperReg = SDValue(Ld, 1);
2481 if (NumVecs == 1) {
2482 ReplaceUses(SDValue(N, 0),
2483 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2484 } else {
2485 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2486 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2487 AArch64::qsub2, AArch64::qsub3 };
2488 for (unsigned i = 0; i < NumVecs; ++i) {
2489 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2490 SuperReg);
2491 if (Narrow)
2492 NV = NarrowVector(NV, *CurDAG);
2493 ReplaceUses(SDValue(N, i), NV);
2494 }
2495 }
2496
2497 // Update the Chain
2498 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2499 CurDAG->RemoveDeadNode(N);
2500}
2501
2502void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2503 unsigned Opc) {
2504 SDLoc dl(N);
2505 EVT VT = N->getOperand(2)->getValueType(0);
2506 bool Narrow = VT.getSizeInBits() == 64;
2507
2508 // Form a REG_SEQUENCE to force register allocation.
2509 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2510
2511 if (Narrow)
2512 transform(Regs, Regs.begin(),
2513 WidenVector(*CurDAG));
2514
2515 SDValue RegSeq = createQTuple(Regs);
2516
2517 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2518
2519 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2520 N->getOperand(NumVecs + 3), N->getOperand(0)};
2521 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2522
2523 // Transfer memoperands.
2524 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2525 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2526
2527 ReplaceNode(N, St);
2528}
2529
2530void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2531 unsigned Opc) {
2532 SDLoc dl(N);
2533 EVT VT = N->getOperand(2)->getValueType(0);
2534 bool Narrow = VT.getSizeInBits() == 64;
2535
2536 // Form a REG_SEQUENCE to force register allocation.
2537 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2538
2539 if (Narrow)
2540 transform(Regs, Regs.begin(),
2541 WidenVector(*CurDAG));
2542
2543 SDValue RegSeq = createQTuple(Regs);
2544
2545 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2546 MVT::Other};
2547
2548 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2549
2550 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2551 N->getOperand(NumVecs + 2), // Base Register
2552 N->getOperand(NumVecs + 3), // Incremental
2553 N->getOperand(0)};
2554 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2555
2556 // Transfer memoperands.
2557 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2558 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2559
2560 ReplaceNode(N, St);
2561}
2562
2564 unsigned &Opc, SDValue &Opd0,
2565 unsigned &LSB, unsigned &MSB,
2566 unsigned NumberOfIgnoredLowBits,
2567 bool BiggerPattern) {
2568 assert(N->getOpcode() == ISD::AND &&
2569 "N must be a AND operation to call this function");
2570
2571 EVT VT = N->getValueType(0);
2572
2573 // Here we can test the type of VT and return false when the type does not
2574 // match, but since it is done prior to that call in the current context
2575 // we turned that into an assert to avoid redundant code.
2576 assert((VT == MVT::i32 || VT == MVT::i64) &&
2577 "Type checking must have been done before calling this function");
2578
2579 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2580 // changed the AND node to a 32-bit mask operation. We'll have to
2581 // undo that as part of the transform here if we want to catch all
2582 // the opportunities.
2583 // Currently the NumberOfIgnoredLowBits argument helps to recover
2584 // from these situations when matching bigger pattern (bitfield insert).
2585
2586 // For unsigned extracts, check for a shift right and mask
2587 uint64_t AndImm = 0;
2588 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2589 return false;
2590
2591 const SDNode *Op0 = N->getOperand(0).getNode();
2592
2593 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2594 // simplified. Try to undo that
2595 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2596
2597 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2598 if (AndImm & (AndImm + 1))
2599 return false;
2600
2601 bool ClampMSB = false;
2602 uint64_t SrlImm = 0;
2603 // Handle the SRL + ANY_EXTEND case.
2604 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2605 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2606 // Extend the incoming operand of the SRL to 64-bit.
2607 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2608 // Make sure to clamp the MSB so that we preserve the semantics of the
2609 // original operations.
2610 ClampMSB = true;
2611 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2613 SrlImm)) {
2614 // If the shift result was truncated, we can still combine them.
2615 Opd0 = Op0->getOperand(0).getOperand(0);
2616
2617 // Use the type of SRL node.
2618 VT = Opd0->getValueType(0);
2619 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2620 Opd0 = Op0->getOperand(0);
2621 ClampMSB = (VT == MVT::i32);
2622 } else if (BiggerPattern) {
2623 // Let's pretend a 0 shift right has been performed.
2624 // The resulting code will be at least as good as the original one
2625 // plus it may expose more opportunities for bitfield insert pattern.
2626 // FIXME: Currently we limit this to the bigger pattern, because
2627 // some optimizations expect AND and not UBFM.
2628 Opd0 = N->getOperand(0);
2629 } else
2630 return false;
2631
2632 // Bail out on large immediates. This happens when no proper
2633 // combining/constant folding was performed.
2634 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2635 LLVM_DEBUG(
2636 (dbgs() << N
2637 << ": Found large shift immediate, this should not happen\n"));
2638 return false;
2639 }
2640
2641 LSB = SrlImm;
2642 MSB = SrlImm +
2643 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2644 : llvm::countr_one<uint64_t>(AndImm)) -
2645 1;
2646 if (ClampMSB)
2647 // Since we're moving the extend before the right shift operation, we need
2648 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2649 // the zeros which would get shifted in with the original right shift
2650 // operation.
2651 MSB = MSB > 31 ? 31 : MSB;
2652
2653 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2654 return true;
2655}
2656
2658 SDValue &Opd0, unsigned &Immr,
2659 unsigned &Imms) {
2660 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2661
2662 EVT VT = N->getValueType(0);
2663 unsigned BitWidth = VT.getSizeInBits();
2664 assert((VT == MVT::i32 || VT == MVT::i64) &&
2665 "Type checking must have been done before calling this function");
2666
2667 SDValue Op = N->getOperand(0);
2668 if (Op->getOpcode() == ISD::TRUNCATE) {
2669 Op = Op->getOperand(0);
2670 VT = Op->getValueType(0);
2671 BitWidth = VT.getSizeInBits();
2672 }
2673
2674 uint64_t ShiftImm;
2675 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2676 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2677 return false;
2678
2679 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2680 if (ShiftImm + Width > BitWidth)
2681 return false;
2682
2683 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2684 Opd0 = Op.getOperand(0);
2685 Immr = ShiftImm;
2686 Imms = ShiftImm + Width - 1;
2687 return true;
2688}
2689
2691 SDValue &Opd0, unsigned &LSB,
2692 unsigned &MSB) {
2693 // We are looking for the following pattern which basically extracts several
2694 // continuous bits from the source value and places it from the LSB of the
2695 // destination value, all other bits of the destination value or set to zero:
2696 //
2697 // Value2 = AND Value, MaskImm
2698 // SRL Value2, ShiftImm
2699 //
2700 // with MaskImm >> ShiftImm to search for the bit width.
2701 //
2702 // This gets selected into a single UBFM:
2703 //
2704 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2705 //
2706
2707 if (N->getOpcode() != ISD::SRL)
2708 return false;
2709
2710 uint64_t AndMask = 0;
2711 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2712 return false;
2713
2714 Opd0 = N->getOperand(0).getOperand(0);
2715
2716 uint64_t SrlImm = 0;
2717 if (!isIntImmediate(N->getOperand(1), SrlImm))
2718 return false;
2719
2720 // Check whether we really have several bits extract here.
2721 if (!isMask_64(AndMask >> SrlImm))
2722 return false;
2723
2724 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2725 LSB = SrlImm;
2726 MSB = llvm::Log2_64(AndMask);
2727 return true;
2728}
2729
2730static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2731 unsigned &Immr, unsigned &Imms,
2732 bool BiggerPattern) {
2733 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2734 "N must be a SHR/SRA operation to call this function");
2735
2736 EVT VT = N->getValueType(0);
2737
2738 // Here we can test the type of VT and return false when the type does not
2739 // match, but since it is done prior to that call in the current context
2740 // we turned that into an assert to avoid redundant code.
2741 assert((VT == MVT::i32 || VT == MVT::i64) &&
2742 "Type checking must have been done before calling this function");
2743
2744 // Check for AND + SRL doing several bits extract.
2745 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2746 return true;
2747
2748 // We're looking for a shift of a shift.
2749 uint64_t ShlImm = 0;
2750 uint64_t TruncBits = 0;
2751 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2752 Opd0 = N->getOperand(0).getOperand(0);
2753 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2754 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2755 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2756 // be considered as setting high 32 bits as zero. Our strategy here is to
2757 // always generate 64bit UBFM. This consistency will help the CSE pass
2758 // later find more redundancy.
2759 Opd0 = N->getOperand(0).getOperand(0);
2760 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2761 VT = Opd0.getValueType();
2762 assert(VT == MVT::i64 && "the promoted type should be i64");
2763 } else if (BiggerPattern) {
2764 // Let's pretend a 0 shift left has been performed.
2765 // FIXME: Currently we limit this to the bigger pattern case,
2766 // because some optimizations expect AND and not UBFM
2767 Opd0 = N->getOperand(0);
2768 } else
2769 return false;
2770
2771 // Missing combines/constant folding may have left us with strange
2772 // constants.
2773 if (ShlImm >= VT.getSizeInBits()) {
2774 LLVM_DEBUG(
2775 (dbgs() << N
2776 << ": Found large shift immediate, this should not happen\n"));
2777 return false;
2778 }
2779
2780 uint64_t SrlImm = 0;
2781 if (!isIntImmediate(N->getOperand(1), SrlImm))
2782 return false;
2783
2784 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2785 "bad amount in shift node!");
2786 int immr = SrlImm - ShlImm;
2787 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2788 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2789 // SRA requires a signed extraction
2790 if (VT == MVT::i32)
2791 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2792 else
2793 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2794 return true;
2795}
2796
2797bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2798 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2799
2800 EVT VT = N->getValueType(0);
2801 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2802 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2803 return false;
2804
2805 uint64_t ShiftImm;
2806 SDValue Op = N->getOperand(0);
2807 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2808 return false;
2809
2810 SDLoc dl(N);
2811 // Extend the incoming operand of the shift to 64-bits.
2812 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2813 unsigned Immr = ShiftImm;
2814 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2815 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2816 CurDAG->getTargetConstant(Imms, dl, VT)};
2817 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2818 return true;
2819}
2820
2821static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2822 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2823 unsigned NumberOfIgnoredLowBits = 0,
2824 bool BiggerPattern = false) {
2825 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2826 return false;
2827
2828 switch (N->getOpcode()) {
2829 default:
2830 if (!N->isMachineOpcode())
2831 return false;
2832 break;
2833 case ISD::AND:
2834 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2835 NumberOfIgnoredLowBits, BiggerPattern);
2836 case ISD::SRL:
2837 case ISD::SRA:
2838 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2839
2841 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2842 }
2843
2844 unsigned NOpc = N->getMachineOpcode();
2845 switch (NOpc) {
2846 default:
2847 return false;
2848 case AArch64::SBFMWri:
2849 case AArch64::UBFMWri:
2850 case AArch64::SBFMXri:
2851 case AArch64::UBFMXri:
2852 Opc = NOpc;
2853 Opd0 = N->getOperand(0);
2854 Immr = N->getConstantOperandVal(1);
2855 Imms = N->getConstantOperandVal(2);
2856 return true;
2857 }
2858 // Unreachable
2859 return false;
2860}
2861
2862bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2863 unsigned Opc, Immr, Imms;
2864 SDValue Opd0;
2865 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2866 return false;
2867
2868 EVT VT = N->getValueType(0);
2869 SDLoc dl(N);
2870
2871 // If the bit extract operation is 64bit but the original type is 32bit, we
2872 // need to add one EXTRACT_SUBREG.
2873 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2874 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2875 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2876
2877 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2878 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2879 MVT::i32, SDValue(BFM, 0));
2880 ReplaceNode(N, Inner.getNode());
2881 return true;
2882 }
2883
2884 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2885 CurDAG->getTargetConstant(Imms, dl, VT)};
2886 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2887 return true;
2888}
2889
2890/// Does DstMask form a complementary pair with the mask provided by
2891/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2892/// this asks whether DstMask zeroes precisely those bits that will be set by
2893/// the other half.
2894static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2895 unsigned NumberOfIgnoredHighBits, EVT VT) {
2896 assert((VT == MVT::i32 || VT == MVT::i64) &&
2897 "i32 or i64 mask type expected!");
2898 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2899
2900 // Enable implicitTrunc as we're intentionally ignoring high bits.
2901 APInt SignificantDstMask =
2902 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
2903 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2904
2905 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2906 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2907}
2908
2909// Look for bits that will be useful for later uses.
2910// A bit is consider useless as soon as it is dropped and never used
2911// before it as been dropped.
2912// E.g., looking for useful bit of x
2913// 1. y = x & 0x7
2914// 2. z = y >> 2
2915// After #1, x useful bits are 0x7, then the useful bits of x, live through
2916// y.
2917// After #2, the useful bits of x are 0x4.
2918// However, if x is used on an unpredictable instruction, then all its bits
2919// are useful.
2920// E.g.
2921// 1. y = x & 0x7
2922// 2. z = y >> 2
2923// 3. str x, [@x]
2924static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2925
2927 unsigned Depth) {
2928 uint64_t Imm =
2929 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2930 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2931 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2932 getUsefulBits(Op, UsefulBits, Depth + 1);
2933}
2934
2936 uint64_t Imm, uint64_t MSB,
2937 unsigned Depth) {
2938 // inherit the bitwidth value
2939 APInt OpUsefulBits(UsefulBits);
2940 OpUsefulBits = 1;
2941
2942 if (MSB >= Imm) {
2943 OpUsefulBits <<= MSB - Imm + 1;
2944 --OpUsefulBits;
2945 // The interesting part will be in the lower part of the result
2946 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2947 // The interesting part was starting at Imm in the argument
2948 OpUsefulBits <<= Imm;
2949 } else {
2950 OpUsefulBits <<= MSB + 1;
2951 --OpUsefulBits;
2952 // The interesting part will be shifted in the result
2953 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2954 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2955 // The interesting part was at zero in the argument
2956 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2957 }
2958
2959 UsefulBits &= OpUsefulBits;
2960}
2961
2962static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2963 unsigned Depth) {
2964 uint64_t Imm =
2965 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2966 uint64_t MSB =
2967 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2968
2969 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2970}
2971
2973 unsigned Depth) {
2974 uint64_t ShiftTypeAndValue =
2975 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2976 APInt Mask(UsefulBits);
2977 Mask.clearAllBits();
2978 Mask.flipAllBits();
2979
2980 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2981 // Shift Left
2982 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2983 Mask <<= ShiftAmt;
2984 getUsefulBits(Op, Mask, Depth + 1);
2985 Mask.lshrInPlace(ShiftAmt);
2986 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2987 // Shift Right
2988 // We do not handle AArch64_AM::ASR, because the sign will change the
2989 // number of useful bits
2990 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2991 Mask.lshrInPlace(ShiftAmt);
2992 getUsefulBits(Op, Mask, Depth + 1);
2993 Mask <<= ShiftAmt;
2994 } else
2995 return;
2996
2997 UsefulBits &= Mask;
2998}
2999
3000static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
3001 unsigned Depth) {
3002 uint64_t Imm =
3003 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3004 uint64_t MSB =
3005 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
3006
3007 APInt OpUsefulBits(UsefulBits);
3008 OpUsefulBits = 1;
3009
3010 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3011 ResultUsefulBits.flipAllBits();
3012 APInt Mask(UsefulBits.getBitWidth(), 0);
3013
3014 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
3015
3016 if (MSB >= Imm) {
3017 // The instruction is a BFXIL.
3018 uint64_t Width = MSB - Imm + 1;
3019 uint64_t LSB = Imm;
3020
3021 OpUsefulBits <<= Width;
3022 --OpUsefulBits;
3023
3024 if (Op.getOperand(1) == Orig) {
3025 // Copy the low bits from the result to bits starting from LSB.
3026 Mask = ResultUsefulBits & OpUsefulBits;
3027 Mask <<= LSB;
3028 }
3029
3030 if (Op.getOperand(0) == Orig)
3031 // Bits starting from LSB in the input contribute to the result.
3032 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3033 } else {
3034 // The instruction is a BFI.
3035 uint64_t Width = MSB + 1;
3036 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3037
3038 OpUsefulBits <<= Width;
3039 --OpUsefulBits;
3040 OpUsefulBits <<= LSB;
3041
3042 if (Op.getOperand(1) == Orig) {
3043 // Copy the bits from the result to the zero bits.
3044 Mask = ResultUsefulBits & OpUsefulBits;
3045 Mask.lshrInPlace(LSB);
3046 }
3047
3048 if (Op.getOperand(0) == Orig)
3049 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3050 }
3051
3052 UsefulBits &= Mask;
3053}
3054
3055static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3056 SDValue Orig, unsigned Depth) {
3057
3058 // Users of this node should have already been instruction selected
3059 // FIXME: Can we turn that into an assert?
3060 if (!UserNode->isMachineOpcode())
3061 return;
3062
3063 switch (UserNode->getMachineOpcode()) {
3064 default:
3065 return;
3066 case AArch64::ANDSWri:
3067 case AArch64::ANDSXri:
3068 case AArch64::ANDWri:
3069 case AArch64::ANDXri:
3070 // We increment Depth only when we call the getUsefulBits
3071 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3072 Depth);
3073 case AArch64::UBFMWri:
3074 case AArch64::UBFMXri:
3075 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3076
3077 case AArch64::ORRWrs:
3078 case AArch64::ORRXrs:
3079 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3080 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3081 Depth);
3082 return;
3083 case AArch64::BFMWri:
3084 case AArch64::BFMXri:
3085 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3086
3087 case AArch64::STRBBui:
3088 case AArch64::STURBBi:
3089 if (UserNode->getOperand(0) != Orig)
3090 return;
3091 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3092 return;
3093
3094 case AArch64::STRHHui:
3095 case AArch64::STURHHi:
3096 if (UserNode->getOperand(0) != Orig)
3097 return;
3098 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3099 return;
3100 }
3101}
3102
3103static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3105 return;
3106 // Initialize UsefulBits
3107 if (!Depth) {
3108 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3109 // At the beginning, assume every produced bits is useful
3110 UsefulBits = APInt(Bitwidth, 0);
3111 UsefulBits.flipAllBits();
3112 }
3113 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3114
3115 for (SDNode *Node : Op.getNode()->users()) {
3116 // A use cannot produce useful bits
3117 APInt UsefulBitsForUse = APInt(UsefulBits);
3118 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3119 UsersUsefulBits |= UsefulBitsForUse;
3120 }
3121 // UsefulBits contains the produced bits that are meaningful for the
3122 // current definition, thus a user cannot make a bit meaningful at
3123 // this point
3124 UsefulBits &= UsersUsefulBits;
3125}
3126
3127/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3128/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3129/// 0, return Op unchanged.
3130static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3131 if (ShlAmount == 0)
3132 return Op;
3133
3134 EVT VT = Op.getValueType();
3135 SDLoc dl(Op);
3136 unsigned BitWidth = VT.getSizeInBits();
3137 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3138
3139 SDNode *ShiftNode;
3140 if (ShlAmount > 0) {
3141 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3142 ShiftNode = CurDAG->getMachineNode(
3143 UBFMOpc, dl, VT, Op,
3144 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3145 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3146 } else {
3147 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3148 assert(ShlAmount < 0 && "expected right shift");
3149 int ShrAmount = -ShlAmount;
3150 ShiftNode = CurDAG->getMachineNode(
3151 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3152 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3153 }
3154
3155 return SDValue(ShiftNode, 0);
3156}
3157
3158// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3159static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3160 bool BiggerPattern,
3161 const uint64_t NonZeroBits,
3162 SDValue &Src, int &DstLSB,
3163 int &Width);
3164
3165// For bit-field-positioning pattern "shl VAL, N)".
3166static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3167 bool BiggerPattern,
3168 const uint64_t NonZeroBits,
3169 SDValue &Src, int &DstLSB,
3170 int &Width);
3171
3172/// Does this tree qualify as an attempt to move a bitfield into position,
3173/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3175 bool BiggerPattern, SDValue &Src,
3176 int &DstLSB, int &Width) {
3177 EVT VT = Op.getValueType();
3178 unsigned BitWidth = VT.getSizeInBits();
3179 (void)BitWidth;
3180 assert(BitWidth == 32 || BitWidth == 64);
3181
3182 KnownBits Known = CurDAG->computeKnownBits(Op);
3183
3184 // Non-zero in the sense that they're not provably zero, which is the key
3185 // point if we want to use this value
3186 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3187 if (!isShiftedMask_64(NonZeroBits))
3188 return false;
3189
3190 switch (Op.getOpcode()) {
3191 default:
3192 break;
3193 case ISD::AND:
3194 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3195 NonZeroBits, Src, DstLSB, Width);
3196 case ISD::SHL:
3197 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3198 NonZeroBits, Src, DstLSB, Width);
3199 }
3200
3201 return false;
3202}
3203
3205 bool BiggerPattern,
3206 const uint64_t NonZeroBits,
3207 SDValue &Src, int &DstLSB,
3208 int &Width) {
3209 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3210
3211 EVT VT = Op.getValueType();
3212 assert((VT == MVT::i32 || VT == MVT::i64) &&
3213 "Caller guarantees VT is one of i32 or i64");
3214 (void)VT;
3215
3216 uint64_t AndImm;
3217 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3218 return false;
3219
3220 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3221 // 1) (AndImm & (1 << POS) == 0)
3222 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3223 //
3224 // 1) and 2) don't agree so something must be wrong (e.g., in
3225 // 'SelectionDAG::computeKnownBits')
3226 assert((~AndImm & NonZeroBits) == 0 &&
3227 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3228
3229 SDValue AndOp0 = Op.getOperand(0);
3230
3231 uint64_t ShlImm;
3232 SDValue ShlOp0;
3233 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3234 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3235 ShlOp0 = AndOp0.getOperand(0);
3236 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3238 ShlImm)) {
3239 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3240
3241 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3242 SDValue ShlVal = AndOp0.getOperand(0);
3243
3244 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3245 // expect VT to be MVT::i32.
3246 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3247
3248 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3249 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3250 } else
3251 return false;
3252
3253 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3254 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3255 // AndOp0+AND.
3256 if (!BiggerPattern && !AndOp0.hasOneUse())
3257 return false;
3258
3259 DstLSB = llvm::countr_zero(NonZeroBits);
3260 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3261
3262 // Bail out on large Width. This happens when no proper combining / constant
3263 // folding was performed.
3264 if (Width >= (int)VT.getSizeInBits()) {
3265 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3266 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3267 // "val".
3268 // If VT is i32, what Width >= 32 means:
3269 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3270 // demands at least 'Width' bits (after dag-combiner). This together with
3271 // `any_extend` Op (undefined higher bits) indicates missed combination
3272 // when lowering the 'and' IR instruction to an machine IR instruction.
3273 LLVM_DEBUG(
3274 dbgs()
3275 << "Found large Width in bit-field-positioning -- this indicates no "
3276 "proper combining / constant folding was performed\n");
3277 return false;
3278 }
3279
3280 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3281 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3282 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3283 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3284 // which case it is not profitable to insert an extra shift.
3285 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3286 return false;
3287
3288 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3289 return true;
3290}
3291
3292// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3293// UBFIZ.
3295 SDValue &Src, int &DstLSB,
3296 int &Width) {
3297 // Caller should have verified that N is a left shift with constant shift
3298 // amount; asserts that.
3299 assert(Op.getOpcode() == ISD::SHL &&
3300 "Op.getNode() should be a SHL node to call this function");
3301 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3302 "Op.getNode() should shift ShlImm to call this function");
3303
3304 uint64_t AndImm = 0;
3305 SDValue Op0 = Op.getOperand(0);
3306 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3307 return false;
3308
3309 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3310 if (isMask_64(ShiftedAndImm)) {
3311 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3312 // should end with Mask, and could be prefixed with random bits if those
3313 // bits are shifted out.
3314 //
3315 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3316 // the AND result corresponding to those bits are shifted out, so it's fine
3317 // to not extract them.
3318 Width = llvm::countr_one(ShiftedAndImm);
3319 DstLSB = ShlImm;
3320 Src = Op0.getOperand(0);
3321 return true;
3322 }
3323 return false;
3324}
3325
3327 bool BiggerPattern,
3328 const uint64_t NonZeroBits,
3329 SDValue &Src, int &DstLSB,
3330 int &Width) {
3331 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3332
3333 EVT VT = Op.getValueType();
3334 assert((VT == MVT::i32 || VT == MVT::i64) &&
3335 "Caller guarantees that type is i32 or i64");
3336 (void)VT;
3337
3338 uint64_t ShlImm;
3339 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3340 return false;
3341
3342 if (!BiggerPattern && !Op.hasOneUse())
3343 return false;
3344
3345 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3346 return true;
3347
3348 DstLSB = llvm::countr_zero(NonZeroBits);
3349 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3350
3351 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3352 return false;
3353
3354 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3355 return true;
3356}
3357
3358static bool isShiftedMask(uint64_t Mask, EVT VT) {
3359 assert(VT == MVT::i32 || VT == MVT::i64);
3360 if (VT == MVT::i32)
3361 return isShiftedMask_32(Mask);
3362 return isShiftedMask_64(Mask);
3363}
3364
3365// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3366// inserted only sets known zero bits.
3368 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3369
3370 EVT VT = N->getValueType(0);
3371 if (VT != MVT::i32 && VT != MVT::i64)
3372 return false;
3373
3374 unsigned BitWidth = VT.getSizeInBits();
3375
3376 uint64_t OrImm;
3377 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3378 return false;
3379
3380 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3381 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3382 // performance neutral.
3384 return false;
3385
3386 uint64_t MaskImm;
3387 SDValue And = N->getOperand(0);
3388 // Must be a single use AND with an immediate operand.
3389 if (!And.hasOneUse() ||
3390 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3391 return false;
3392
3393 // Compute the Known Zero for the AND as this allows us to catch more general
3394 // cases than just looking for AND with imm.
3395 KnownBits Known = CurDAG->computeKnownBits(And);
3396
3397 // Non-zero in the sense that they're not provably zero, which is the key
3398 // point if we want to use this value.
3399 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3400
3401 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3402 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3403 return false;
3404
3405 // The bits being inserted must only set those bits that are known to be zero.
3406 if ((OrImm & NotKnownZero) != 0) {
3407 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3408 // currently handle this case.
3409 return false;
3410 }
3411
3412 // BFI/BFXIL dst, src, #lsb, #width.
3413 int LSB = llvm::countr_one(NotKnownZero);
3414 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3415
3416 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3417 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3418 unsigned ImmS = Width - 1;
3419
3420 // If we're creating a BFI instruction avoid cases where we need more
3421 // instructions to materialize the BFI constant as compared to the original
3422 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3423 // should be no worse in this case.
3424 bool IsBFI = LSB != 0;
3425 uint64_t BFIImm = OrImm >> LSB;
3426 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3427 // We have a BFI instruction and we know the constant can't be materialized
3428 // with a ORR-immediate with the zero register.
3429 unsigned OrChunks = 0, BFIChunks = 0;
3430 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3431 if (((OrImm >> Shift) & 0xFFFF) != 0)
3432 ++OrChunks;
3433 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3434 ++BFIChunks;
3435 }
3436 if (BFIChunks > OrChunks)
3437 return false;
3438 }
3439
3440 // Materialize the constant to be inserted.
3441 SDLoc DL(N);
3442 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3443 SDNode *MOVI = CurDAG->getMachineNode(
3444 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3445
3446 // Create the BFI/BFXIL instruction.
3447 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3448 CurDAG->getTargetConstant(ImmR, DL, VT),
3449 CurDAG->getTargetConstant(ImmS, DL, VT)};
3450 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3451 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3452 return true;
3453}
3454
3456 SDValue &ShiftedOperand,
3457 uint64_t &EncodedShiftImm) {
3458 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3459 if (!Dst.hasOneUse())
3460 return false;
3461
3462 EVT VT = Dst.getValueType();
3463 assert((VT == MVT::i32 || VT == MVT::i64) &&
3464 "Caller should guarantee that VT is one of i32 or i64");
3465 const unsigned SizeInBits = VT.getSizeInBits();
3466
3467 SDLoc DL(Dst.getNode());
3468 uint64_t AndImm, ShlImm;
3469 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3470 isShiftedMask_64(AndImm)) {
3471 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3472 SDValue DstOp0 = Dst.getOperand(0);
3473 if (!DstOp0.hasOneUse())
3474 return false;
3475
3476 // An example to illustrate the transformation
3477 // From:
3478 // lsr x8, x1, #1
3479 // and x8, x8, #0x3f80
3480 // bfxil x8, x1, #0, #7
3481 // To:
3482 // and x8, x23, #0x7f
3483 // ubfx x9, x23, #8, #7
3484 // orr x23, x8, x9, lsl #7
3485 //
3486 // The number of instructions remains the same, but ORR is faster than BFXIL
3487 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3488 // the dependency chain is improved after the transformation.
3489 uint64_t SrlImm;
3490 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3491 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3492 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3493 unsigned MaskWidth =
3494 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3495 unsigned UBFMOpc =
3496 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3497 SDNode *UBFMNode = CurDAG->getMachineNode(
3498 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3499 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3500 VT),
3501 CurDAG->getTargetConstant(
3502 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3503 ShiftedOperand = SDValue(UBFMNode, 0);
3504 EncodedShiftImm = AArch64_AM::getShifterImm(
3505 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3506 return true;
3507 }
3508 }
3509 return false;
3510 }
3511
3512 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3513 ShiftedOperand = Dst.getOperand(0);
3514 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3515 return true;
3516 }
3517
3518 uint64_t SrlImm;
3519 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3520 ShiftedOperand = Dst.getOperand(0);
3521 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3522 return true;
3523 }
3524 return false;
3525}
3526
3527// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3528// the operands and select it to AArch64::ORR with shifted registers if
3529// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3530static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3531 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3532 const bool BiggerPattern) {
3533 EVT VT = N->getValueType(0);
3534 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3535 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3536 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3537 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3538 assert((VT == MVT::i32 || VT == MVT::i64) &&
3539 "Expect result type to be i32 or i64 since N is combinable to BFM");
3540 SDLoc DL(N);
3541
3542 // Bail out if BFM simplifies away one node in BFM Dst.
3543 if (OrOpd1 != Dst)
3544 return false;
3545
3546 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3547 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3548 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3549 if (BiggerPattern) {
3550 uint64_t SrcAndImm;
3551 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3552 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3553 // OrOpd0 = AND Src, #Mask
3554 // So BFM simplifies away one AND node from Src and doesn't simplify away
3555 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3556 // one node (from Rd), ORR is better since it has higher throughput and
3557 // smaller latency than BFM on many AArch64 processors (and for the rest
3558 // ORR is at least as good as BFM).
3559 SDValue ShiftedOperand;
3560 uint64_t EncodedShiftImm;
3561 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3562 EncodedShiftImm)) {
3563 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3564 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3565 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3566 return true;
3567 }
3568 }
3569 return false;
3570 }
3571
3572 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3573
3574 uint64_t ShlImm;
3575 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3576 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3577 SDValue Ops[] = {
3578 Dst, Src,
3579 CurDAG->getTargetConstant(
3581 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3582 return true;
3583 }
3584
3585 // Select the following pattern to left-shifted operand rather than BFI.
3586 // %val1 = op ..
3587 // %val2 = shl %val1, #imm
3588 // %res = or %val1, %val2
3589 //
3590 // If N is selected to be BFI, we know that
3591 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3592 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3593 //
3594 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3595 if (OrOpd0.getOperand(0) == OrOpd1) {
3596 SDValue Ops[] = {
3597 OrOpd1, OrOpd1,
3598 CurDAG->getTargetConstant(
3600 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3601 return true;
3602 }
3603 }
3604
3605 uint64_t SrlImm;
3606 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3607 // Select the following pattern to right-shifted operand rather than BFXIL.
3608 // %val1 = op ..
3609 // %val2 = lshr %val1, #imm
3610 // %res = or %val1, %val2
3611 //
3612 // If N is selected to be BFXIL, we know that
3613 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3614 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3615 //
3616 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3617 if (OrOpd0.getOperand(0) == OrOpd1) {
3618 SDValue Ops[] = {
3619 OrOpd1, OrOpd1,
3620 CurDAG->getTargetConstant(
3622 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3623 return true;
3624 }
3625 }
3626
3627 return false;
3628}
3629
3630static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3631 SelectionDAG *CurDAG) {
3632 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3633
3634 EVT VT = N->getValueType(0);
3635 if (VT != MVT::i32 && VT != MVT::i64)
3636 return false;
3637
3638 unsigned BitWidth = VT.getSizeInBits();
3639
3640 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3641 // have the expected shape. Try to undo that.
3642
3643 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3644 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3645
3646 // Given a OR operation, check if we have the following pattern
3647 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3648 // isBitfieldExtractOp)
3649 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3650 // countTrailingZeros(mask2) == imm2 - imm + 1
3651 // f = d | c
3652 // if yes, replace the OR instruction with:
3653 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3654
3655 // OR is commutative, check all combinations of operand order and values of
3656 // BiggerPattern, i.e.
3657 // Opd0, Opd1, BiggerPattern=false
3658 // Opd1, Opd0, BiggerPattern=false
3659 // Opd0, Opd1, BiggerPattern=true
3660 // Opd1, Opd0, BiggerPattern=true
3661 // Several of these combinations may match, so check with BiggerPattern=false
3662 // first since that will produce better results by matching more instructions
3663 // and/or inserting fewer extra instructions.
3664 for (int I = 0; I < 4; ++I) {
3665
3666 SDValue Dst, Src;
3667 unsigned ImmR, ImmS;
3668 bool BiggerPattern = I / 2;
3669 SDValue OrOpd0Val = N->getOperand(I % 2);
3670 SDNode *OrOpd0 = OrOpd0Val.getNode();
3671 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3672 SDNode *OrOpd1 = OrOpd1Val.getNode();
3673
3674 unsigned BFXOpc;
3675 int DstLSB, Width;
3676 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3677 NumberOfIgnoredLowBits, BiggerPattern)) {
3678 // Check that the returned opcode is compatible with the pattern,
3679 // i.e., same type and zero extended (U and not S)
3680 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3681 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3682 continue;
3683
3684 // Compute the width of the bitfield insertion
3685 DstLSB = 0;
3686 Width = ImmS - ImmR + 1;
3687 // FIXME: This constraint is to catch bitfield insertion we may
3688 // want to widen the pattern if we want to grab general bitfield
3689 // move case
3690 if (Width <= 0)
3691 continue;
3692
3693 // If the mask on the insertee is correct, we have a BFXIL operation. We
3694 // can share the ImmR and ImmS values from the already-computed UBFM.
3695 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3696 BiggerPattern,
3697 Src, DstLSB, Width)) {
3698 ImmR = (BitWidth - DstLSB) % BitWidth;
3699 ImmS = Width - 1;
3700 } else
3701 continue;
3702
3703 // Check the second part of the pattern
3704 EVT VT = OrOpd1Val.getValueType();
3705 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3706
3707 // Compute the Known Zero for the candidate of the first operand.
3708 // This allows to catch more general case than just looking for
3709 // AND with imm. Indeed, simplify-demanded-bits may have removed
3710 // the AND instruction because it proves it was useless.
3711 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3712
3713 // Check if there is enough room for the second operand to appear
3714 // in the first one
3715 APInt BitsToBeInserted =
3716 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3717
3718 if ((BitsToBeInserted & ~Known.Zero) != 0)
3719 continue;
3720
3721 // Set the first operand
3722 uint64_t Imm;
3723 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3724 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3725 // In that case, we can eliminate the AND
3726 Dst = OrOpd1->getOperand(0);
3727 else
3728 // Maybe the AND has been removed by simplify-demanded-bits
3729 // or is useful because it discards more bits
3730 Dst = OrOpd1Val;
3731
3732 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3733 // with shifted operand is more efficient.
3734 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3735 BiggerPattern))
3736 return true;
3737
3738 // both parts match
3739 SDLoc DL(N);
3740 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3741 CurDAG->getTargetConstant(ImmS, DL, VT)};
3742 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3743 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3744 return true;
3745 }
3746
3747 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3748 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3749 // mask (e.g., 0x000ffff0).
3750 uint64_t Mask0Imm, Mask1Imm;
3751 SDValue And0 = N->getOperand(0);
3752 SDValue And1 = N->getOperand(1);
3753 if (And0.hasOneUse() && And1.hasOneUse() &&
3754 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3755 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3756 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3757 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3758
3759 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3760 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3761 // bits to be inserted.
3762 if (isShiftedMask(Mask0Imm, VT)) {
3763 std::swap(And0, And1);
3764 std::swap(Mask0Imm, Mask1Imm);
3765 }
3766
3767 SDValue Src = And1->getOperand(0);
3768 SDValue Dst = And0->getOperand(0);
3769 unsigned LSB = llvm::countr_zero(Mask1Imm);
3770 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3771
3772 // The BFXIL inserts the low-order bits from a source register, so right
3773 // shift the needed bits into place.
3774 SDLoc DL(N);
3775 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3776 uint64_t LsrImm = LSB;
3777 if (Src->hasOneUse() &&
3778 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3779 (LsrImm + LSB) < BitWidth) {
3780 Src = Src->getOperand(0);
3781 LsrImm += LSB;
3782 }
3783
3784 SDNode *LSR = CurDAG->getMachineNode(
3785 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3786 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3787
3788 // BFXIL is an alias of BFM, so translate to BFM operands.
3789 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3790 unsigned ImmS = Width - 1;
3791
3792 // Create the BFXIL instruction.
3793 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3794 CurDAG->getTargetConstant(ImmR, DL, VT),
3795 CurDAG->getTargetConstant(ImmS, DL, VT)};
3796 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3797 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3798 return true;
3799 }
3800
3801 return false;
3802}
3803
3804bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3805 if (N->getOpcode() != ISD::OR)
3806 return false;
3807
3808 APInt NUsefulBits;
3809 getUsefulBits(SDValue(N, 0), NUsefulBits);
3810
3811 // If all bits are not useful, just return UNDEF.
3812 if (!NUsefulBits) {
3813 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3814 return true;
3815 }
3816
3817 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3818 return true;
3819
3820 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3821}
3822
3823/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3824/// equivalent of a left shift by a constant amount followed by an and masking
3825/// out a contiguous set of bits.
3826bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3827 if (N->getOpcode() != ISD::AND)
3828 return false;
3829
3830 EVT VT = N->getValueType(0);
3831 if (VT != MVT::i32 && VT != MVT::i64)
3832 return false;
3833
3834 SDValue Op0;
3835 int DstLSB, Width;
3836 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3837 Op0, DstLSB, Width))
3838 return false;
3839
3840 // ImmR is the rotate right amount.
3841 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3842 // ImmS is the most significant bit of the source to be moved.
3843 unsigned ImmS = Width - 1;
3844
3845 SDLoc DL(N);
3846 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3847 CurDAG->getTargetConstant(ImmS, DL, VT)};
3848 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3849 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3850 return true;
3851}
3852
3853/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3854/// variable shift/rotate instructions.
3855bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3856 EVT VT = N->getValueType(0);
3857
3858 unsigned Opc;
3859 switch (N->getOpcode()) {
3860 case ISD::ROTR:
3861 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3862 break;
3863 case ISD::SHL:
3864 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3865 break;
3866 case ISD::SRL:
3867 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3868 break;
3869 case ISD::SRA:
3870 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3871 break;
3872 default:
3873 return false;
3874 }
3875
3876 uint64_t Size;
3877 uint64_t Bits;
3878 if (VT == MVT::i32) {
3879 Bits = 5;
3880 Size = 32;
3881 } else if (VT == MVT::i64) {
3882 Bits = 6;
3883 Size = 64;
3884 } else
3885 return false;
3886
3887 SDValue ShiftAmt = N->getOperand(1);
3888 SDLoc DL(N);
3889 SDValue NewShiftAmt;
3890
3891 // Skip over an extend of the shift amount.
3892 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3893 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3894 ShiftAmt = ShiftAmt->getOperand(0);
3895
3896 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3897 SDValue Add0 = ShiftAmt->getOperand(0);
3898 SDValue Add1 = ShiftAmt->getOperand(1);
3899 uint64_t Add0Imm;
3900 uint64_t Add1Imm;
3901 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3902 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3903 // to avoid the ADD/SUB.
3904 NewShiftAmt = Add0;
3905 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3906 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3907 (Add0Imm % Size == 0)) {
3908 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3909 // to generate a NEG instead of a SUB from a constant.
3910 unsigned NegOpc;
3911 unsigned ZeroReg;
3912 EVT SubVT = ShiftAmt->getValueType(0);
3913 if (SubVT == MVT::i32) {
3914 NegOpc = AArch64::SUBWrr;
3915 ZeroReg = AArch64::WZR;
3916 } else {
3917 assert(SubVT == MVT::i64);
3918 NegOpc = AArch64::SUBXrr;
3919 ZeroReg = AArch64::XZR;
3920 }
3921 SDValue Zero =
3922 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3923 MachineSDNode *Neg =
3924 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3925 NewShiftAmt = SDValue(Neg, 0);
3926 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3927 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3928 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3929 // to generate a NOT instead of a SUB from a constant.
3930 unsigned NotOpc;
3931 unsigned ZeroReg;
3932 EVT SubVT = ShiftAmt->getValueType(0);
3933 if (SubVT == MVT::i32) {
3934 NotOpc = AArch64::ORNWrr;
3935 ZeroReg = AArch64::WZR;
3936 } else {
3937 assert(SubVT == MVT::i64);
3938 NotOpc = AArch64::ORNXrr;
3939 ZeroReg = AArch64::XZR;
3940 }
3941 SDValue Zero =
3942 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3943 MachineSDNode *Not =
3944 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3945 NewShiftAmt = SDValue(Not, 0);
3946 } else
3947 return false;
3948 } else {
3949 // If the shift amount is masked with an AND, check that the mask covers the
3950 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3951 // the AND.
3952 uint64_t MaskImm;
3953 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3954 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3955 return false;
3956
3957 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3958 return false;
3959
3960 NewShiftAmt = ShiftAmt->getOperand(0);
3961 }
3962
3963 // Narrow/widen the shift amount to match the size of the shift operation.
3964 if (VT == MVT::i32)
3965 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3966 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3967 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3968 MachineSDNode *Ext = CurDAG->getMachineNode(
3969 AArch64::SUBREG_TO_REG, DL, VT,
3970 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3971 NewShiftAmt = SDValue(Ext, 0);
3972 }
3973
3974 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3975 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3976 return true;
3977}
3978
3980 SDValue &FixedPos,
3981 unsigned RegWidth,
3982 bool isReciprocal) {
3983 APFloat FVal(0.0);
3985 FVal = CN->getValueAPF();
3986 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3987 // Some otherwise illegal constants are allowed in this case.
3988 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3989 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3990 return false;
3991
3992 ConstantPoolSDNode *CN =
3993 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3994 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3995 } else
3996 return false;
3997
3998 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3999 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
4000 // x-register.
4001 //
4002 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
4003 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
4004 // integers.
4005 bool IsExact;
4006
4007 if (isReciprocal)
4008 if (!FVal.getExactInverse(&FVal))
4009 return false;
4010
4011 // fbits is between 1 and 64 in the worst-case, which means the fmul
4012 // could have 2^64 as an actual operand. Need 65 bits of precision.
4013 APSInt IntVal(65, true);
4014 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
4015
4016 // N.b. isPowerOf2 also checks for > 0.
4017 if (!IsExact || !IntVal.isPowerOf2())
4018 return false;
4019 unsigned FBits = IntVal.logBase2();
4020
4021 // Checks above should have guaranteed that we haven't lost information in
4022 // finding FBits, but it must still be in range.
4023 if (FBits == 0 || FBits > RegWidth) return false;
4024
4025 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4026 return true;
4027}
4028
4029bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4030 unsigned RegWidth) {
4031 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4032 false);
4033}
4034
4035bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4036 SDValue &FixedPos,
4037 unsigned RegWidth) {
4038 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4039 true);
4040}
4041
4042// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4043// of the string and obtains the integer values from them and combines these
4044// into a single value to be used in the MRS/MSR instruction.
4047 RegString.split(Fields, ':');
4048
4049 if (Fields.size() == 1)
4050 return -1;
4051
4052 assert(Fields.size() == 5
4053 && "Invalid number of fields in read register string");
4054
4056 bool AllIntFields = true;
4057
4058 for (StringRef Field : Fields) {
4059 unsigned IntField;
4060 AllIntFields &= !Field.getAsInteger(10, IntField);
4061 Ops.push_back(IntField);
4062 }
4063
4064 assert(AllIntFields &&
4065 "Unexpected non-integer value in special register string.");
4066 (void)AllIntFields;
4067
4068 // Need to combine the integer fields of the string into a single value
4069 // based on the bit encoding of MRS/MSR instruction.
4070 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
4071 (Ops[3] << 3) | (Ops[4]);
4072}
4073
4074// Lower the read_register intrinsic to an MRS instruction node if the special
4075// register string argument is either of the form detailed in the ALCE (the
4076// form described in getIntOperandsFromRegisterString) or is a named register
4077// known by the MRS SysReg mapper.
4078bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4079 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4080 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4081 SDLoc DL(N);
4082
4083 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4084
4085 unsigned Opcode64Bit = AArch64::MRS;
4086 int Imm = getIntOperandFromRegisterString(RegString->getString());
4087 if (Imm == -1) {
4088 // No match, Use the sysreg mapper to map the remaining possible strings to
4089 // the value for the register to be used for the instruction operand.
4090 const auto *TheReg =
4091 AArch64SysReg::lookupSysRegByName(RegString->getString());
4092 if (TheReg && TheReg->Readable &&
4093 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4094 Imm = TheReg->Encoding;
4095 else
4096 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4097
4098 if (Imm == -1) {
4099 // Still no match, see if this is "pc" or give up.
4100 if (!ReadIs128Bit && RegString->getString() == "pc") {
4101 Opcode64Bit = AArch64::ADR;
4102 Imm = 0;
4103 } else {
4104 return false;
4105 }
4106 }
4107 }
4108
4109 SDValue InChain = N->getOperand(0);
4110 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4111 if (!ReadIs128Bit) {
4112 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4113 {SysRegImm, InChain});
4114 } else {
4115 SDNode *MRRS = CurDAG->getMachineNode(
4116 AArch64::MRRS, DL,
4117 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4118 {SysRegImm, InChain});
4119
4120 // Sysregs are not endian. The even register always contains the low half
4121 // of the register.
4122 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4123 SDValue(MRRS, 0));
4124 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4125 SDValue(MRRS, 0));
4126 SDValue OutChain = SDValue(MRRS, 1);
4127
4128 ReplaceUses(SDValue(N, 0), Lo);
4129 ReplaceUses(SDValue(N, 1), Hi);
4130 ReplaceUses(SDValue(N, 2), OutChain);
4131 };
4132 return true;
4133}
4134
4135// Lower the write_register intrinsic to an MSR instruction node if the special
4136// register string argument is either of the form detailed in the ALCE (the
4137// form described in getIntOperandsFromRegisterString) or is a named register
4138// known by the MSR SysReg mapper.
4139bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4140 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4141 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4142 SDLoc DL(N);
4143
4144 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4145
4146 if (!WriteIs128Bit) {
4147 // Check if the register was one of those allowed as the pstatefield value
4148 // in the MSR (immediate) instruction. To accept the values allowed in the
4149 // pstatefield for the MSR (immediate) instruction, we also require that an
4150 // immediate value has been provided as an argument, we know that this is
4151 // the case as it has been ensured by semantic checking.
4152 auto trySelectPState = [&](auto PMapper, unsigned State) {
4153 if (PMapper) {
4154 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4155 "Expected a constant integer expression.");
4156 unsigned Reg = PMapper->Encoding;
4157 uint64_t Immed = N->getConstantOperandVal(2);
4158 CurDAG->SelectNodeTo(
4159 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4160 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4161 return true;
4162 }
4163 return false;
4164 };
4165
4166 if (trySelectPState(
4167 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4168 AArch64::MSRpstateImm4))
4169 return true;
4170 if (trySelectPState(
4171 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4172 AArch64::MSRpstateImm1))
4173 return true;
4174 }
4175
4176 int Imm = getIntOperandFromRegisterString(RegString->getString());
4177 if (Imm == -1) {
4178 // Use the sysreg mapper to attempt to map the remaining possible strings
4179 // to the value for the register to be used for the MSR (register)
4180 // instruction operand.
4181 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4182 if (TheReg && TheReg->Writeable &&
4183 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4184 Imm = TheReg->Encoding;
4185 else
4186 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4187
4188 if (Imm == -1)
4189 return false;
4190 }
4191
4192 SDValue InChain = N->getOperand(0);
4193 if (!WriteIs128Bit) {
4194 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4195 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4196 N->getOperand(2), InChain);
4197 } else {
4198 // No endian swap. The lower half always goes into the even subreg, and the
4199 // higher half always into the odd supreg.
4200 SDNode *Pair = CurDAG->getMachineNode(
4201 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4202 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4203 MVT::i32),
4204 N->getOperand(2),
4205 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4206 N->getOperand(3),
4207 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4208
4209 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4210 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4211 SDValue(Pair, 0), InChain);
4212 }
4213
4214 return true;
4215}
4216
4217/// We've got special pseudo-instructions for these
4218bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4219 unsigned Opcode;
4220 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4221
4222 // Leave IR for LSE if subtarget supports it.
4223 if (Subtarget->hasLSE()) return false;
4224
4225 if (MemTy == MVT::i8)
4226 Opcode = AArch64::CMP_SWAP_8;
4227 else if (MemTy == MVT::i16)
4228 Opcode = AArch64::CMP_SWAP_16;
4229 else if (MemTy == MVT::i32)
4230 Opcode = AArch64::CMP_SWAP_32;
4231 else if (MemTy == MVT::i64)
4232 Opcode = AArch64::CMP_SWAP_64;
4233 else
4234 llvm_unreachable("Unknown AtomicCmpSwap type");
4235
4236 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4237 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4238 N->getOperand(0)};
4239 SDNode *CmpSwap = CurDAG->getMachineNode(
4240 Opcode, SDLoc(N),
4241 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4242
4243 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4244 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4245
4246 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4247 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4248 CurDAG->RemoveDeadNode(N);
4249
4250 return true;
4251}
4252
4253bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4254 SDValue &Shift, bool Negate) {
4255 if (!isa<ConstantSDNode>(N))
4256 return false;
4257
4258 SDLoc DL(N);
4259 APInt Val =
4260 cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4261
4262 if (Negate)
4263 Val = -Val;
4264
4265 switch (VT.SimpleTy) {
4266 case MVT::i8:
4267 // All immediates are supported.
4268 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4269 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4270 return true;
4271 case MVT::i16:
4272 case MVT::i32:
4273 case MVT::i64:
4274 // Support 8bit unsigned immediates.
4275 if ((Val & ~0xff) == 0) {
4276 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4277 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4278 return true;
4279 }
4280 // Support 16bit unsigned immediates that are a multiple of 256.
4281 if ((Val & ~0xff00) == 0) {
4282 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4283 Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
4284 return true;
4285 }
4286 break;
4287 default:
4288 break;
4289 }
4290
4291 return false;
4292}
4293
4294bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4295 SDValue &Imm, SDValue &Shift,
4296 bool Negate) {
4297 if (!isa<ConstantSDNode>(N))
4298 return false;
4299
4300 SDLoc DL(N);
4301 int64_t Val = cast<ConstantSDNode>(N)
4302 ->getAPIntValue()
4304 .getSExtValue();
4305
4306 if (Negate)
4307 Val = -Val;
4308
4309 // Signed saturating instructions treat their immediate operand as unsigned,
4310 // whereas the related intrinsics define their operands to be signed. This
4311 // means we can only use the immediate form when the operand is non-negative.
4312 if (Val < 0)
4313 return false;
4314
4315 switch (VT.SimpleTy) {
4316 case MVT::i8:
4317 // All positive immediates are supported.
4318 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4319 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4320 return true;
4321 case MVT::i16:
4322 case MVT::i32:
4323 case MVT::i64:
4324 // Support 8bit positive immediates.
4325 if (Val <= 255) {
4326 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4327 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4328 return true;
4329 }
4330 // Support 16bit positive immediates that are a multiple of 256.
4331 if (Val <= 65280 && Val % 256 == 0) {
4332 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4333 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4334 return true;
4335 }
4336 break;
4337 default:
4338 break;
4339 }
4340
4341 return false;
4342}
4343
4344bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4345 SDValue &Shift) {
4346 if (!isa<ConstantSDNode>(N))
4347 return false;
4348
4349 SDLoc DL(N);
4350 int64_t Val = cast<ConstantSDNode>(N)
4351 ->getAPIntValue()
4352 .trunc(VT.getFixedSizeInBits())
4353 .getSExtValue();
4354 int32_t ImmVal, ShiftVal;
4355 if (!AArch64_AM::isSVECpyDupImm(VT.getScalarSizeInBits(), Val, ImmVal,
4356 ShiftVal))
4357 return false;
4358
4359 Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32);
4360 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4361 return true;
4362}
4363
4364bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4365 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4366 int64_t ImmVal = CNode->getSExtValue();
4367 SDLoc DL(N);
4368 if (ImmVal >= -128 && ImmVal < 128) {
4369 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4370 return true;
4371 }
4372 }
4373 return false;
4374}
4375
4376bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4377 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4378 uint64_t ImmVal = CNode->getZExtValue();
4379
4380 switch (VT.SimpleTy) {
4381 case MVT::i8:
4382 ImmVal &= 0xFF;
4383 break;
4384 case MVT::i16:
4385 ImmVal &= 0xFFFF;
4386 break;
4387 case MVT::i32:
4388 ImmVal &= 0xFFFFFFFF;
4389 break;
4390 case MVT::i64:
4391 break;
4392 default:
4393 llvm_unreachable("Unexpected type");
4394 }
4395
4396 if (ImmVal < 256) {
4397 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4398 return true;
4399 }
4400 }
4401 return false;
4402}
4403
4404bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4405 bool Invert) {
4406 uint64_t ImmVal;
4407 if (auto CI = dyn_cast<ConstantSDNode>(N))
4408 ImmVal = CI->getZExtValue();
4409 else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
4410 ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
4411 else
4412 return false;
4413
4414 if (Invert)
4415 ImmVal = ~ImmVal;
4416
4417 // Shift mask depending on type size.
4418 switch (VT.SimpleTy) {
4419 case MVT::i8:
4420 ImmVal &= 0xFF;
4421 ImmVal |= ImmVal << 8;
4422 ImmVal |= ImmVal << 16;
4423 ImmVal |= ImmVal << 32;
4424 break;
4425 case MVT::i16:
4426 ImmVal &= 0xFFFF;
4427 ImmVal |= ImmVal << 16;
4428 ImmVal |= ImmVal << 32;
4429 break;
4430 case MVT::i32:
4431 ImmVal &= 0xFFFFFFFF;
4432 ImmVal |= ImmVal << 32;
4433 break;
4434 case MVT::i64:
4435 break;
4436 default:
4437 llvm_unreachable("Unexpected type");
4438 }
4439
4440 uint64_t encoding;
4441 if (!AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding))
4442 return false;
4443
4444 Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
4445 return true;
4446}
4447
4448// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4449// Rather than attempt to normalise everything we can sometimes saturate the
4450// shift amount during selection. This function also allows for consistent
4451// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4452// required by the instructions.
4453bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4454 uint64_t High, bool AllowSaturation,
4455 SDValue &Imm) {
4456 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4457 uint64_t ImmVal = CN->getZExtValue();
4458
4459 // Reject shift amounts that are too small.
4460 if (ImmVal < Low)
4461 return false;
4462
4463 // Reject or saturate shift amounts that are too big.
4464 if (ImmVal > High) {
4465 if (!AllowSaturation)
4466 return false;
4467 ImmVal = High;
4468 }
4469
4470 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4471 return true;
4472 }
4473
4474 return false;
4475}
4476
4477bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4478 // tagp(FrameIndex, IRGstack, tag_offset):
4479 // since the offset between FrameIndex and IRGstack is a compile-time
4480 // constant, this can be lowered to a single ADDG instruction.
4481 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4482 return false;
4483 }
4484
4485 SDValue IRG_SP = N->getOperand(2);
4486 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4487 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4488 return false;
4489 }
4490
4491 const TargetLowering *TLI = getTargetLowering();
4492 SDLoc DL(N);
4493 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4494 SDValue FiOp = CurDAG->getTargetFrameIndex(
4495 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4496 int TagOffset = N->getConstantOperandVal(3);
4497
4498 SDNode *Out = CurDAG->getMachineNode(
4499 AArch64::TAGPstack, DL, MVT::i64,
4500 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4501 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4502 ReplaceNode(N, Out);
4503 return true;
4504}
4505
4506void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4507 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4508 "llvm.aarch64.tagp third argument must be an immediate");
4509 if (trySelectStackSlotTagP(N))
4510 return;
4511 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4512 // compile-time constant, not just for stack allocations.
4513
4514 // General case for unrelated pointers in Op1 and Op2.
4515 SDLoc DL(N);
4516 int TagOffset = N->getConstantOperandVal(3);
4517 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4518 {N->getOperand(1), N->getOperand(2)});
4519 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4520 {SDValue(N1, 0), N->getOperand(2)});
4521 SDNode *N3 = CurDAG->getMachineNode(
4522 AArch64::ADDG, DL, MVT::i64,
4523 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4524 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4525 ReplaceNode(N, N3);
4526}
4527
4528bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4529 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4530
4531 // Bail when not a "cast" like insert_subvector.
4532 if (N->getConstantOperandVal(2) != 0)
4533 return false;
4534 if (!N->getOperand(0).isUndef())
4535 return false;
4536
4537 // Bail when normal isel should do the job.
4538 EVT VT = N->getValueType(0);
4539 EVT InVT = N->getOperand(1).getValueType();
4540 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4541 return false;
4542 if (InVT.getSizeInBits() <= 128)
4543 return false;
4544
4545 // NOTE: We can only get here when doing fixed length SVE code generation.
4546 // We do manual selection because the types involved are not linked to real
4547 // registers (despite being legal) and must be coerced into SVE registers.
4548
4550 "Expected to insert into a packed scalable vector!");
4551
4552 SDLoc DL(N);
4553 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4554 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4555 N->getOperand(1), RC));
4556 return true;
4557}
4558
4559bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4560 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4561
4562 // Bail when not a "cast" like extract_subvector.
4563 if (N->getConstantOperandVal(1) != 0)
4564 return false;
4565
4566 // Bail when normal isel can do the job.
4567 EVT VT = N->getValueType(0);
4568 EVT InVT = N->getOperand(0).getValueType();
4569 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4570 return false;
4571 if (VT.getSizeInBits() <= 128)
4572 return false;
4573
4574 // NOTE: We can only get here when doing fixed length SVE code generation.
4575 // We do manual selection because the types involved are not linked to real
4576 // registers (despite being legal) and must be coerced into SVE registers.
4577
4579 "Expected to extract from a packed scalable vector!");
4580
4581 SDLoc DL(N);
4582 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4583 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4584 N->getOperand(0), RC));
4585 return true;
4586}
4587
4588bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4589 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4590
4591 SDValue N0 = N->getOperand(0);
4592 SDValue N1 = N->getOperand(1);
4593
4594 EVT VT = N->getValueType(0);
4595 SDLoc DL(N);
4596
4597 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4598 // Rotate by a constant is a funnel shift in IR which is exanded to
4599 // an OR with shifted operands.
4600 // We do the following transform:
4601 // OR N0, N1 -> xar (x, y, imm)
4602 // Where:
4603 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4604 // N0 = SHL_PRED true, V, splat(bits-imm)
4605 // V = (xor x, y)
4606 if (VT.isScalableVector() &&
4607 (Subtarget->hasSVE2() ||
4608 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4609 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4610 N1.getOpcode() != AArch64ISD::SRL_PRED)
4611 std::swap(N0, N1);
4612 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4613 N1.getOpcode() != AArch64ISD::SRL_PRED)
4614 return false;
4615
4616 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4617 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4618 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4619 return false;
4620
4621 if (N0.getOperand(1) != N1.getOperand(1))
4622 return false;
4623
4624 SDValue R1, R2;
4625 bool IsXOROperand = true;
4626 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4627 IsXOROperand = false;
4628 } else {
4629 R1 = N0.getOperand(1).getOperand(0);
4630 R2 = N1.getOperand(1).getOperand(1);
4631 }
4632
4633 APInt ShlAmt, ShrAmt;
4634 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4636 return false;
4637
4638 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4639 return false;
4640
4641 if (!IsXOROperand) {
4642 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4643 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4644 SDValue MOVIV = SDValue(MOV, 0);
4645
4646 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4647 SDNode *SubRegToReg = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL,
4648 VT, Zero, MOVIV, ZSub);
4649
4650 R1 = N1->getOperand(1);
4651 R2 = SDValue(SubRegToReg, 0);
4652 }
4653
4654 SDValue Imm =
4655 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4656
4657 SDValue Ops[] = {R1, R2, Imm};
4659 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4660 AArch64::XAR_ZZZI_D})) {
4661 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4662 return true;
4663 }
4664 return false;
4665 }
4666
4667 // We have Neon SHA3 XAR operation for v2i64 but for types
4668 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4669 // is available.
4670 EVT SVT;
4671 switch (VT.getSimpleVT().SimpleTy) {
4672 case MVT::v4i32:
4673 case MVT::v2i32:
4674 SVT = MVT::nxv4i32;
4675 break;
4676 case MVT::v8i16:
4677 case MVT::v4i16:
4678 SVT = MVT::nxv8i16;
4679 break;
4680 case MVT::v16i8:
4681 case MVT::v8i8:
4682 SVT = MVT::nxv16i8;
4683 break;
4684 case MVT::v2i64:
4685 case MVT::v1i64:
4686 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4687 break;
4688 default:
4689 return false;
4690 }
4691
4692 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4693 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4694 return false;
4695
4696 if (N0->getOpcode() != AArch64ISD::VSHL ||
4697 N1->getOpcode() != AArch64ISD::VLSHR)
4698 return false;
4699
4700 if (N0->getOperand(0) != N1->getOperand(0))
4701 return false;
4702
4703 SDValue R1, R2;
4704 bool IsXOROperand = true;
4705 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4706 IsXOROperand = false;
4707 } else {
4708 SDValue XOR = N0.getOperand(0);
4709 R1 = XOR.getOperand(0);
4710 R2 = XOR.getOperand(1);
4711 }
4712
4713 unsigned HsAmt = N0.getConstantOperandVal(1);
4714 unsigned ShAmt = N1.getConstantOperandVal(1);
4715
4716 SDValue Imm = CurDAG->getTargetConstant(
4717 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4718
4719 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4720 if (ShAmt + HsAmt != VTSizeInBits)
4721 return false;
4722
4723 if (!IsXOROperand) {
4724 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4725 SDNode *MOV =
4726 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4727 SDValue MOVIV = SDValue(MOV, 0);
4728
4729 R1 = N1->getOperand(0);
4730 R2 = MOVIV;
4731 }
4732
4733 if (SVT != VT) {
4734 SDValue Undef =
4735 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4736
4737 if (SVT.isScalableVector() && VT.is64BitVector()) {
4738 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4739
4740 SDValue UndefQ = SDValue(
4741 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4742 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4743
4744 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4745 UndefQ, R1, DSub),
4746 0);
4747 if (R2.getValueType() == VT)
4748 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4749 UndefQ, R2, DSub),
4750 0);
4751 }
4752
4753 SDValue SubReg = CurDAG->getTargetConstant(
4754 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
4755
4756 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4757 R1, SubReg),
4758 0);
4759
4760 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4761 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4762 Undef, R2, SubReg),
4763 0);
4764 }
4765
4766 SDValue Ops[] = {R1, R2, Imm};
4767 SDNode *XAR = nullptr;
4768
4769 if (SVT.isScalableVector()) {
4771 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4772 AArch64::XAR_ZZZI_D}))
4773 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
4774 } else {
4775 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
4776 }
4777
4778 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4779
4780 if (SVT != VT) {
4781 if (VT.is64BitVector() && SVT.isScalableVector()) {
4782 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4783
4784 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4785 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
4786 SDValue(XAR, 0), ZSub);
4787
4788 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4789 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4790 SDValue(Q, 0), DSub);
4791 } else {
4792 SDValue SubReg = CurDAG->getTargetConstant(
4793 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4794 MVT::i32);
4795 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4796 SDValue(XAR, 0), SubReg);
4797 }
4798 }
4799 ReplaceNode(N, XAR);
4800 return true;
4801}
4802
4803void AArch64DAGToDAGISel::Select(SDNode *Node) {
4804 // If we have a custom node, we already have selected!
4805 if (Node->isMachineOpcode()) {
4806 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4807 Node->setNodeId(-1);
4808 return;
4809 }
4810
4811 // Few custom selection stuff.
4812 EVT VT = Node->getValueType(0);
4813
4814 switch (Node->getOpcode()) {
4815 default:
4816 break;
4817
4818 case ISD::ATOMIC_CMP_SWAP:
4819 if (SelectCMP_SWAP(Node))
4820 return;
4821 break;
4822
4823 case ISD::READ_REGISTER:
4824 case AArch64ISD::MRRS:
4825 if (tryReadRegister(Node))
4826 return;
4827 break;
4828
4830 case AArch64ISD::MSRR:
4831 if (tryWriteRegister(Node))
4832 return;
4833 break;
4834
4835 case ISD::LOAD: {
4836 // Try to select as an indexed load. Fall through to normal processing
4837 // if we can't.
4838 if (tryIndexedLoad(Node))
4839 return;
4840 break;
4841 }
4842
4843 case ISD::SRL:
4844 case ISD::AND:
4845 case ISD::SRA:
4847 if (tryBitfieldExtractOp(Node))
4848 return;
4849 if (tryBitfieldInsertInZeroOp(Node))
4850 return;
4851 [[fallthrough]];
4852 case ISD::ROTR:
4853 case ISD::SHL:
4854 if (tryShiftAmountMod(Node))
4855 return;
4856 break;
4857
4858 case ISD::SIGN_EXTEND:
4859 if (tryBitfieldExtractOpFromSExt(Node))
4860 return;
4861 break;
4862
4863 case ISD::OR:
4864 if (tryBitfieldInsertOp(Node))
4865 return;
4866 if (trySelectXAR(Node))
4867 return;
4868 break;
4869
4871 if (trySelectCastScalableToFixedLengthVector(Node))
4872 return;
4873 break;
4874 }
4875
4876 case ISD::INSERT_SUBVECTOR: {
4877 if (trySelectCastFixedLengthToScalableVector(Node))
4878 return;
4879 break;
4880 }
4881
4882 case ISD::Constant: {
4883 // Materialize zero constants as copies from WZR/XZR. This allows
4884 // the coalescer to propagate these into other instructions.
4885 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4886 if (ConstNode->isZero()) {
4887 if (VT == MVT::i32) {
4888 SDValue New = CurDAG->getCopyFromReg(
4889 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4890 ReplaceNode(Node, New.getNode());
4891 return;
4892 } else if (VT == MVT::i64) {
4893 SDValue New = CurDAG->getCopyFromReg(
4894 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4895 ReplaceNode(Node, New.getNode());
4896 return;
4897 }
4898 }
4899 break;
4900 }
4901
4902 case ISD::FrameIndex: {
4903 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4904 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4905 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4906 const TargetLowering *TLI = getTargetLowering();
4907 SDValue TFI = CurDAG->getTargetFrameIndex(
4908 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4909 SDLoc DL(Node);
4910 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4911 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4912 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4913 return;
4914 }
4916 unsigned IntNo = Node->getConstantOperandVal(1);
4917 switch (IntNo) {
4918 default:
4919 break;
4920 case Intrinsic::aarch64_gcsss: {
4921 SDLoc DL(Node);
4922 SDValue Chain = Node->getOperand(0);
4923 SDValue Val = Node->getOperand(2);
4924 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4925 SDNode *SS1 =
4926 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4927 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4928 MVT::Other, Zero, SDValue(SS1, 0));
4929 ReplaceNode(Node, SS2);
4930 return;
4931 }
4932 case Intrinsic::aarch64_ldaxp:
4933 case Intrinsic::aarch64_ldxp: {
4934 unsigned Op =
4935 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4936 SDValue MemAddr = Node->getOperand(2);
4937 SDLoc DL(Node);
4938 SDValue Chain = Node->getOperand(0);
4939
4940 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4941 MVT::Other, MemAddr, Chain);
4942
4943 // Transfer memoperands.
4944 MachineMemOperand *MemOp =
4945 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4946 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4947 ReplaceNode(Node, Ld);
4948 return;
4949 }
4950 case Intrinsic::aarch64_stlxp:
4951 case Intrinsic::aarch64_stxp: {
4952 unsigned Op =
4953 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4954 SDLoc DL(Node);
4955 SDValue Chain = Node->getOperand(0);
4956 SDValue ValLo = Node->getOperand(2);
4957 SDValue ValHi = Node->getOperand(3);
4958 SDValue MemAddr = Node->getOperand(4);
4959
4960 // Place arguments in the right order.
4961 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4962
4963 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4964 // Transfer memoperands.
4965 MachineMemOperand *MemOp =
4966 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4967 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4968
4969 ReplaceNode(Node, St);
4970 return;
4971 }
4972 case Intrinsic::aarch64_neon_ld1x2:
4973 if (VT == MVT::v8i8) {
4974 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4975 return;
4976 } else if (VT == MVT::v16i8) {
4977 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4978 return;
4979 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4980 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4981 return;
4982 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4983 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4984 return;
4985 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4986 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4987 return;
4988 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4989 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4990 return;
4991 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4992 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4993 return;
4994 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4995 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4996 return;
4997 }
4998 break;
4999 case Intrinsic::aarch64_neon_ld1x3:
5000 if (VT == MVT::v8i8) {
5001 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
5002 return;
5003 } else if (VT == MVT::v16i8) {
5004 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
5005 return;
5006 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5007 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
5008 return;
5009 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5010 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
5011 return;
5012 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5013 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
5014 return;
5015 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5016 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
5017 return;
5018 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5019 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5020 return;
5021 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5022 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5023 return;
5024 }
5025 break;
5026 case Intrinsic::aarch64_neon_ld1x4:
5027 if (VT == MVT::v8i8) {
5028 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5029 return;
5030 } else if (VT == MVT::v16i8) {
5031 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5032 return;
5033 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5034 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5035 return;
5036 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5037 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5038 return;
5039 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5040 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5041 return;
5042 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5043 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5044 return;
5045 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5046 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5047 return;
5048 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5049 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5050 return;
5051 }
5052 break;
5053 case Intrinsic::aarch64_neon_ld2:
5054 if (VT == MVT::v8i8) {
5055 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5056 return;
5057 } else if (VT == MVT::v16i8) {
5058 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5059 return;
5060 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5061 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5062 return;
5063 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5064 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5065 return;
5066 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5067 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5068 return;
5069 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5070 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5071 return;
5072 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5073 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5074 return;
5075 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5076 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5077 return;
5078 }
5079 break;
5080 case Intrinsic::aarch64_neon_ld3:
5081 if (VT == MVT::v8i8) {
5082 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5083 return;
5084 } else if (VT == MVT::v16i8) {
5085 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5086 return;
5087 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5088 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5089 return;
5090 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5091 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5092 return;
5093 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5094 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5095 return;
5096 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5097 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5098 return;
5099 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5100 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5101 return;
5102 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5103 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5104 return;
5105 }
5106 break;
5107 case Intrinsic::aarch64_neon_ld4:
5108 if (VT == MVT::v8i8) {
5109 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5110 return;
5111 } else if (VT == MVT::v16i8) {
5112 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5113 return;
5114 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5115 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5116 return;
5117 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5118 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5119 return;
5120 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5121 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5122 return;
5123 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5124 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5125 return;
5126 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5127 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5128 return;
5129 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5130 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5131 return;
5132 }
5133 break;
5134 case Intrinsic::aarch64_neon_ld2r:
5135 if (VT == MVT::v8i8) {
5136 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5137 return;
5138 } else if (VT == MVT::v16i8) {
5139 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5140 return;
5141 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5142 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5143 return;
5144 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5145 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5146 return;
5147 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5148 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5149 return;
5150 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5151 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5152 return;
5153 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5154 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5155 return;
5156 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5157 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5158 return;
5159 }
5160 break;
5161 case Intrinsic::aarch64_neon_ld3r:
5162 if (VT == MVT::v8i8) {
5163 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5164 return;
5165 } else if (VT == MVT::v16i8) {
5166 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5167 return;
5168 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5169 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5170 return;
5171 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5172 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5173 return;
5174 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5175 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5176 return;
5177 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5178 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5179 return;
5180 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5181 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5182 return;
5183 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5184 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5185 return;
5186 }
5187 break;
5188 case Intrinsic::aarch64_neon_ld4r:
5189 if (VT == MVT::v8i8) {
5190 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5191 return;
5192 } else if (VT == MVT::v16i8) {
5193 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5194 return;
5195 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5196 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5197 return;
5198 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5199 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5200 return;
5201 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5202 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5203 return;
5204 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5205 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5206 return;
5207 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5208 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5209 return;
5210 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5211 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5212 return;
5213 }
5214 break;
5215 case Intrinsic::aarch64_neon_ld2lane:
5216 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5217 SelectLoadLane(Node, 2, AArch64::LD2i8);
5218 return;
5219 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5220 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5221 SelectLoadLane(Node, 2, AArch64::LD2i16);
5222 return;
5223 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5224 VT == MVT::v2f32) {
5225 SelectLoadLane(Node, 2, AArch64::LD2i32);
5226 return;
5227 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5228 VT == MVT::v1f64) {
5229 SelectLoadLane(Node, 2, AArch64::LD2i64);
5230 return;
5231 }
5232 break;
5233 case Intrinsic::aarch64_neon_ld3lane:
5234 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5235 SelectLoadLane(Node, 3, AArch64::LD3i8);
5236 return;
5237 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5238 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5239 SelectLoadLane(Node, 3, AArch64::LD3i16);
5240 return;
5241 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5242 VT == MVT::v2f32) {
5243 SelectLoadLane(Node, 3, AArch64::LD3i32);
5244 return;
5245 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5246 VT == MVT::v1f64) {
5247 SelectLoadLane(Node, 3, AArch64::LD3i64);
5248 return;
5249 }
5250 break;
5251 case Intrinsic::aarch64_neon_ld4lane:
5252 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5253 SelectLoadLane(Node, 4, AArch64::LD4i8);
5254 return;
5255 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5256 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5257 SelectLoadLane(Node, 4, AArch64::LD4i16);
5258 return;
5259 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5260 VT == MVT::v2f32) {
5261 SelectLoadLane(Node, 4, AArch64::LD4i32);
5262 return;
5263 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5264 VT == MVT::v1f64) {
5265 SelectLoadLane(Node, 4, AArch64::LD4i64);
5266 return;
5267 }
5268 break;
5269 case Intrinsic::aarch64_ld64b:
5270 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5271 return;
5272 case Intrinsic::aarch64_sve_ld2q_sret: {
5273 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5274 return;
5275 }
5276 case Intrinsic::aarch64_sve_ld3q_sret: {
5277 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5278 return;
5279 }
5280 case Intrinsic::aarch64_sve_ld4q_sret: {
5281 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5282 return;
5283 }
5284 case Intrinsic::aarch64_sve_ld2_sret: {
5285 if (VT == MVT::nxv16i8) {
5286 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5287 true);
5288 return;
5289 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5290 VT == MVT::nxv8bf16) {
5291 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5292 true);
5293 return;
5294 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5295 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5296 true);
5297 return;
5298 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5299 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5300 true);
5301 return;
5302 }
5303 break;
5304 }
5305 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5306 if (VT == MVT::nxv16i8) {
5307 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5308 SelectContiguousMultiVectorLoad(
5309 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5310 else if (Subtarget->hasSVE2p1())
5311 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5312 AArch64::LD1B_2Z);
5313 else
5314 break;
5315 return;
5316 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5317 VT == MVT::nxv8bf16) {
5318 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5319 SelectContiguousMultiVectorLoad(
5320 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5321 else if (Subtarget->hasSVE2p1())
5322 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5323 AArch64::LD1H_2Z);
5324 else
5325 break;
5326 return;
5327 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5328 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5329 SelectContiguousMultiVectorLoad(
5330 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5331 else if (Subtarget->hasSVE2p1())
5332 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5333 AArch64::LD1W_2Z);
5334 else
5335 break;
5336 return;
5337 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5338 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5339 SelectContiguousMultiVectorLoad(
5340 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5341 else if (Subtarget->hasSVE2p1())
5342 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5343 AArch64::LD1D_2Z);
5344 else
5345 break;
5346 return;
5347 }
5348 break;
5349 }
5350 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5351 if (VT == MVT::nxv16i8) {
5352 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5353 SelectContiguousMultiVectorLoad(
5354 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5355 else if (Subtarget->hasSVE2p1())
5356 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5357 AArch64::LD1B_4Z);
5358 else
5359 break;
5360 return;
5361 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5362 VT == MVT::nxv8bf16) {
5363 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5364 SelectContiguousMultiVectorLoad(
5365 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5366 else if (Subtarget->hasSVE2p1())
5367 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5368 AArch64::LD1H_4Z);
5369 else
5370 break;
5371 return;
5372 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5373 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5374 SelectContiguousMultiVectorLoad(
5375 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5376 else if (Subtarget->hasSVE2p1())
5377 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5378 AArch64::LD1W_4Z);
5379 else
5380 break;
5381 return;
5382 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5383 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5384 SelectContiguousMultiVectorLoad(
5385 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5386 else if (Subtarget->hasSVE2p1())
5387 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5388 AArch64::LD1D_4Z);
5389 else
5390 break;
5391 return;
5392 }
5393 break;
5394 }
5395 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5396 if (VT == MVT::nxv16i8) {
5397 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5398 SelectContiguousMultiVectorLoad(Node, 2, 0,
5399 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5400 AArch64::LDNT1B_2Z_PSEUDO);
5401 else if (Subtarget->hasSVE2p1())
5402 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5403 AArch64::LDNT1B_2Z);
5404 else
5405 break;
5406 return;
5407 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5408 VT == MVT::nxv8bf16) {
5409 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5410 SelectContiguousMultiVectorLoad(Node, 2, 1,
5411 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5412 AArch64::LDNT1H_2Z_PSEUDO);
5413 else if (Subtarget->hasSVE2p1())
5414 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5415 AArch64::LDNT1H_2Z);
5416 else
5417 break;
5418 return;
5419 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5420 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5421 SelectContiguousMultiVectorLoad(Node, 2, 2,
5422 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5423 AArch64::LDNT1W_2Z_PSEUDO);
5424 else if (Subtarget->hasSVE2p1())
5425 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5426 AArch64::LDNT1W_2Z);
5427 else
5428 break;
5429 return;
5430 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5431 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5432 SelectContiguousMultiVectorLoad(Node, 2, 3,
5433 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5434 AArch64::LDNT1D_2Z_PSEUDO);
5435 else if (Subtarget->hasSVE2p1())
5436 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5437 AArch64::LDNT1D_2Z);
5438 else
5439 break;
5440 return;
5441 }
5442 break;
5443 }
5444 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5445 if (VT == MVT::nxv16i8) {
5446 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5447 SelectContiguousMultiVectorLoad(Node, 4, 0,
5448 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5449 AArch64::LDNT1B_4Z_PSEUDO);
5450 else if (Subtarget->hasSVE2p1())
5451 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5452 AArch64::LDNT1B_4Z);
5453 else
5454 break;
5455 return;
5456 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5457 VT == MVT::nxv8bf16) {
5458 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5459 SelectContiguousMultiVectorLoad(Node, 4, 1,
5460 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5461 AArch64::LDNT1H_4Z_PSEUDO);
5462 else if (Subtarget->hasSVE2p1())
5463 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5464 AArch64::LDNT1H_4Z);
5465 else
5466 break;
5467 return;
5468 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5469 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5470 SelectContiguousMultiVectorLoad(Node, 4, 2,
5471 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5472 AArch64::LDNT1W_4Z_PSEUDO);
5473 else if (Subtarget->hasSVE2p1())
5474 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5475 AArch64::LDNT1W_4Z);
5476 else
5477 break;
5478 return;
5479 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5480 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5481 SelectContiguousMultiVectorLoad(Node, 4, 3,
5482 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5483 AArch64::LDNT1D_4Z_PSEUDO);
5484 else if (Subtarget->hasSVE2p1())
5485 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5486 AArch64::LDNT1D_4Z);
5487 else
5488 break;
5489 return;
5490 }
5491 break;
5492 }
5493 case Intrinsic::aarch64_sve_ld3_sret: {
5494 if (VT == MVT::nxv16i8) {
5495 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5496 true);
5497 return;
5498 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5499 VT == MVT::nxv8bf16) {
5500 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5501 true);
5502 return;
5503 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5504 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5505 true);
5506 return;
5507 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5508 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5509 true);
5510 return;
5511 }
5512 break;
5513 }
5514 case Intrinsic::aarch64_sve_ld4_sret: {
5515 if (VT == MVT::nxv16i8) {
5516 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5517 true);
5518 return;
5519 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5520 VT == MVT::nxv8bf16) {
5521 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5522 true);
5523 return;
5524 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5525 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5526 true);
5527 return;
5528 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5529 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5530 true);
5531 return;
5532 }
5533 break;
5534 }
5535 case Intrinsic::aarch64_sme_read_hor_vg2: {
5536 if (VT == MVT::nxv16i8) {
5537 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5538 AArch64::MOVA_2ZMXI_H_B);
5539 return;
5540 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5541 VT == MVT::nxv8bf16) {
5542 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5543 AArch64::MOVA_2ZMXI_H_H);
5544 return;
5545 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5546 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5547 AArch64::MOVA_2ZMXI_H_S);
5548 return;
5549 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5550 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5551 AArch64::MOVA_2ZMXI_H_D);
5552 return;
5553 }
5554 break;
5555 }
5556 case Intrinsic::aarch64_sme_read_ver_vg2: {
5557 if (VT == MVT::nxv16i8) {
5558 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5559 AArch64::MOVA_2ZMXI_V_B);
5560 return;
5561 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5562 VT == MVT::nxv8bf16) {
5563 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5564 AArch64::MOVA_2ZMXI_V_H);
5565 return;
5566 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5567 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5568 AArch64::MOVA_2ZMXI_V_S);
5569 return;
5570 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5571 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5572 AArch64::MOVA_2ZMXI_V_D);
5573 return;
5574 }
5575 break;
5576 }
5577 case Intrinsic::aarch64_sme_read_hor_vg4: {
5578 if (VT == MVT::nxv16i8) {
5579 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5580 AArch64::MOVA_4ZMXI_H_B);
5581 return;
5582 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5583 VT == MVT::nxv8bf16) {
5584 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5585 AArch64::MOVA_4ZMXI_H_H);
5586 return;
5587 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5588 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5589 AArch64::MOVA_4ZMXI_H_S);
5590 return;
5591 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5592 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5593 AArch64::MOVA_4ZMXI_H_D);
5594 return;
5595 }
5596 break;
5597 }
5598 case Intrinsic::aarch64_sme_read_ver_vg4: {
5599 if (VT == MVT::nxv16i8) {
5600 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5601 AArch64::MOVA_4ZMXI_V_B);
5602 return;
5603 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5604 VT == MVT::nxv8bf16) {
5605 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5606 AArch64::MOVA_4ZMXI_V_H);
5607 return;
5608 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5609 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5610 AArch64::MOVA_4ZMXI_V_S);
5611 return;
5612 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5613 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5614 AArch64::MOVA_4ZMXI_V_D);
5615 return;
5616 }
5617 break;
5618 }
5619 case Intrinsic::aarch64_sme_read_vg1x2: {
5620 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5621 AArch64::MOVA_VG2_2ZMXI);
5622 return;
5623 }
5624 case Intrinsic::aarch64_sme_read_vg1x4: {
5625 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5626 AArch64::MOVA_VG4_4ZMXI);
5627 return;
5628 }
5629 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5630 if (VT == MVT::nxv16i8) {
5631 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5632 return;
5633 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5634 VT == MVT::nxv8bf16) {
5635 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5636 return;
5637 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5638 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5639 return;
5640 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5641 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5642 return;
5643 }
5644 break;
5645 }
5646 case Intrinsic::aarch64_sme_readz_vert_x2: {
5647 if (VT == MVT::nxv16i8) {
5648 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5649 return;
5650 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5651 VT == MVT::nxv8bf16) {
5652 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5653 return;
5654 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5655 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5656 return;
5657 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5658 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5659 return;
5660 }
5661 break;
5662 }
5663 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5664 if (VT == MVT::nxv16i8) {
5665 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5666 return;
5667 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5668 VT == MVT::nxv8bf16) {
5669 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5670 return;
5671 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5672 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5673 return;
5674 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5675 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5676 return;
5677 }
5678 break;
5679 }
5680 case Intrinsic::aarch64_sme_readz_vert_x4: {
5681 if (VT == MVT::nxv16i8) {
5682 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5683 return;
5684 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5685 VT == MVT::nxv8bf16) {
5686 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5687 return;
5688 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5689 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5690 return;
5691 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5692 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5693 return;
5694 }
5695 break;
5696 }
5697 case Intrinsic::aarch64_sme_readz_x2: {
5698 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5699 AArch64::ZA);
5700 return;
5701 }
5702 case Intrinsic::aarch64_sme_readz_x4: {
5703 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5704 AArch64::ZA);
5705 return;
5706 }
5707 case Intrinsic::swift_async_context_addr: {
5708 SDLoc DL(Node);
5709 SDValue Chain = Node->getOperand(0);
5710 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5711 SDValue Res = SDValue(
5712 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5713 CurDAG->getTargetConstant(8, DL, MVT::i32),
5714 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5715 0);
5716 ReplaceUses(SDValue(Node, 0), Res);
5717 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5718 CurDAG->RemoveDeadNode(Node);
5719
5720 auto &MF = CurDAG->getMachineFunction();
5721 MF.getFrameInfo().setFrameAddressIsTaken(true);
5722 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5723 return;
5724 }
5725 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5727 Node->getValueType(0),
5728 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5729 AArch64::LUTI2_4ZTZI_S}))
5730 // Second Immediate must be <= 3:
5731 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5732 return;
5733 }
5734 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5736 Node->getValueType(0),
5737 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5738 // Second Immediate must be <= 1:
5739 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5740 return;
5741 }
5742 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5744 Node->getValueType(0),
5745 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5746 AArch64::LUTI2_2ZTZI_S}))
5747 // Second Immediate must be <= 7:
5748 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5749 return;
5750 }
5751 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5753 Node->getValueType(0),
5754 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5755 AArch64::LUTI4_2ZTZI_S}))
5756 // Second Immediate must be <= 3:
5757 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5758 return;
5759 }
5760 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5761 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5762 return;
5763 }
5764 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5766 Node->getValueType(0),
5767 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5768 SelectCVTIntrinsicFP8(Node, 2, Opc);
5769 return;
5770 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5772 Node->getValueType(0),
5773 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5774 SelectCVTIntrinsicFP8(Node, 2, Opc);
5775 return;
5776 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5778 Node->getValueType(0),
5779 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5780 SelectCVTIntrinsicFP8(Node, 2, Opc);
5781 return;
5782 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5784 Node->getValueType(0),
5785 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5786 SelectCVTIntrinsicFP8(Node, 2, Opc);
5787 return;
5788 }
5789 } break;
5791 unsigned IntNo = Node->getConstantOperandVal(0);
5792 switch (IntNo) {
5793 default:
5794 break;
5795 case Intrinsic::aarch64_tagp:
5796 SelectTagP(Node);
5797 return;
5798
5799 case Intrinsic::ptrauth_auth:
5800 SelectPtrauthAuth(Node);
5801 return;
5802
5803 case Intrinsic::ptrauth_resign:
5804 SelectPtrauthResign(Node);
5805 return;
5806
5807 case Intrinsic::aarch64_neon_tbl2:
5808 SelectTable(Node, 2,
5809 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5810 false);
5811 return;
5812 case Intrinsic::aarch64_neon_tbl3:
5813 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5814 : AArch64::TBLv16i8Three,
5815 false);
5816 return;
5817 case Intrinsic::aarch64_neon_tbl4:
5818 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5819 : AArch64::TBLv16i8Four,
5820 false);
5821 return;
5822 case Intrinsic::aarch64_neon_tbx2:
5823 SelectTable(Node, 2,
5824 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5825 true);
5826 return;
5827 case Intrinsic::aarch64_neon_tbx3:
5828 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5829 : AArch64::TBXv16i8Three,
5830 true);
5831 return;
5832 case Intrinsic::aarch64_neon_tbx4:
5833 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5834 : AArch64::TBXv16i8Four,
5835 true);
5836 return;
5837 case Intrinsic::aarch64_sve_srshl_single_x2:
5839 Node->getValueType(0),
5840 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5841 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5842 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5843 return;
5844 case Intrinsic::aarch64_sve_srshl_single_x4:
5846 Node->getValueType(0),
5847 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5848 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5849 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5850 return;
5851 case Intrinsic::aarch64_sve_urshl_single_x2:
5853 Node->getValueType(0),
5854 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5855 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5856 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5857 return;
5858 case Intrinsic::aarch64_sve_urshl_single_x4:
5860 Node->getValueType(0),
5861 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5862 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5863 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5864 return;
5865 case Intrinsic::aarch64_sve_srshl_x2:
5867 Node->getValueType(0),
5868 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5869 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5870 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5871 return;
5872 case Intrinsic::aarch64_sve_srshl_x4:
5874 Node->getValueType(0),
5875 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5876 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5877 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5878 return;
5879 case Intrinsic::aarch64_sve_urshl_x2:
5881 Node->getValueType(0),
5882 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5883 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5884 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5885 return;
5886 case Intrinsic::aarch64_sve_urshl_x4:
5888 Node->getValueType(0),
5889 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5890 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5891 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5892 return;
5893 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5895 Node->getValueType(0),
5896 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5897 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5898 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5899 return;
5900 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5902 Node->getValueType(0),
5903 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5904 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5905 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5906 return;
5907 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5909 Node->getValueType(0),
5910 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5911 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5912 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5913 return;
5914 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5916 Node->getValueType(0),
5917 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5918 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5919 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5920 return;
5921 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
5923 Node->getValueType(0),
5924 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
5925 AArch64::FSCALE_2ZZ_D}))
5926 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5927 return;
5928 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
5930 Node->getValueType(0),
5931 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
5932 AArch64::FSCALE_4ZZ_D}))
5933 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5934 return;
5935 case Intrinsic::aarch64_sme_fp8_scale_x2:
5937 Node->getValueType(0),
5938 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
5939 AArch64::FSCALE_2Z2Z_D}))
5940 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5941 return;
5942 case Intrinsic::aarch64_sme_fp8_scale_x4:
5944 Node->getValueType(0),
5945 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
5946 AArch64::FSCALE_4Z4Z_D}))
5947 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5948 return;
5949 case Intrinsic::aarch64_sve_whilege_x2:
5951 Node->getValueType(0),
5952 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5953 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5954 SelectWhilePair(Node, Op);
5955 return;
5956 case Intrinsic::aarch64_sve_whilegt_x2:
5958 Node->getValueType(0),
5959 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5960 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5961 SelectWhilePair(Node, Op);
5962 return;
5963 case Intrinsic::aarch64_sve_whilehi_x2:
5965 Node->getValueType(0),
5966 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5967 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5968 SelectWhilePair(Node, Op);
5969 return;
5970 case Intrinsic::aarch64_sve_whilehs_x2:
5972 Node->getValueType(0),
5973 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5974 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5975 SelectWhilePair(Node, Op);
5976 return;
5977 case Intrinsic::aarch64_sve_whilele_x2:
5979 Node->getValueType(0),
5980 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5981 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5982 SelectWhilePair(Node, Op);
5983 return;
5984 case Intrinsic::aarch64_sve_whilelo_x2:
5986 Node->getValueType(0),
5987 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5988 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5989 SelectWhilePair(Node, Op);
5990 return;
5991 case Intrinsic::aarch64_sve_whilels_x2:
5993 Node->getValueType(0),
5994 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5995 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5996 SelectWhilePair(Node, Op);
5997 return;
5998 case Intrinsic::aarch64_sve_whilelt_x2:
6000 Node->getValueType(0),
6001 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
6002 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
6003 SelectWhilePair(Node, Op);
6004 return;
6005 case Intrinsic::aarch64_sve_smax_single_x2:
6007 Node->getValueType(0),
6008 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6009 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6010 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6011 return;
6012 case Intrinsic::aarch64_sve_umax_single_x2:
6014 Node->getValueType(0),
6015 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6016 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6017 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6018 return;
6019 case Intrinsic::aarch64_sve_fmax_single_x2:
6021 Node->getValueType(0),
6022 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6023 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6024 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6025 return;
6026 case Intrinsic::aarch64_sve_smax_single_x4:
6028 Node->getValueType(0),
6029 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6030 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6031 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6032 return;
6033 case Intrinsic::aarch64_sve_umax_single_x4:
6035 Node->getValueType(0),
6036 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6037 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6038 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6039 return;
6040 case Intrinsic::aarch64_sve_fmax_single_x4:
6042 Node->getValueType(0),
6043 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6044 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6045 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6046 return;
6047 case Intrinsic::aarch64_sve_smin_single_x2:
6049 Node->getValueType(0),
6050 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6051 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6052 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6053 return;
6054 case Intrinsic::aarch64_sve_umin_single_x2:
6056 Node->getValueType(0),
6057 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6058 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6059 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6060 return;
6061 case Intrinsic::aarch64_sve_fmin_single_x2:
6063 Node->getValueType(0),
6064 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6065 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6066 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6067 return;
6068 case Intrinsic::aarch64_sve_smin_single_x4:
6070 Node->getValueType(0),
6071 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6072 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6073 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6074 return;
6075 case Intrinsic::aarch64_sve_umin_single_x4:
6077 Node->getValueType(0),
6078 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6079 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6080 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6081 return;
6082 case Intrinsic::aarch64_sve_fmin_single_x4:
6084 Node->getValueType(0),
6085 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6086 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6087 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6088 return;
6089 case Intrinsic::aarch64_sve_smax_x2:
6091 Node->getValueType(0),
6092 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6093 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6094 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6095 return;
6096 case Intrinsic::aarch64_sve_umax_x2:
6098 Node->getValueType(0),
6099 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6100 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6101 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6102 return;
6103 case Intrinsic::aarch64_sve_fmax_x2:
6105 Node->getValueType(0),
6106 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6107 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6108 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6109 return;
6110 case Intrinsic::aarch64_sve_smax_x4:
6112 Node->getValueType(0),
6113 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6114 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6115 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6116 return;
6117 case Intrinsic::aarch64_sve_umax_x4:
6119 Node->getValueType(0),
6120 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6121 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6122 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6123 return;
6124 case Intrinsic::aarch64_sve_fmax_x4:
6126 Node->getValueType(0),
6127 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6128 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6129 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6130 return;
6131 case Intrinsic::aarch64_sme_famax_x2:
6133 Node->getValueType(0),
6134 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6135 AArch64::FAMAX_2Z2Z_D}))
6136 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6137 return;
6138 case Intrinsic::aarch64_sme_famax_x4:
6140 Node->getValueType(0),
6141 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6142 AArch64::FAMAX_4Z4Z_D}))
6143 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6144 return;
6145 case Intrinsic::aarch64_sme_famin_x2:
6147 Node->getValueType(0),
6148 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6149 AArch64::FAMIN_2Z2Z_D}))
6150 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6151 return;
6152 case Intrinsic::aarch64_sme_famin_x4:
6154 Node->getValueType(0),
6155 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6156 AArch64::FAMIN_4Z4Z_D}))
6157 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6158 return;
6159 case Intrinsic::aarch64_sve_smin_x2:
6161 Node->getValueType(0),
6162 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6163 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6164 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6165 return;
6166 case Intrinsic::aarch64_sve_umin_x2:
6168 Node->getValueType(0),
6169 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6170 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6171 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6172 return;
6173 case Intrinsic::aarch64_sve_fmin_x2:
6175 Node->getValueType(0),
6176 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6177 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6178 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6179 return;
6180 case Intrinsic::aarch64_sve_smin_x4:
6182 Node->getValueType(0),
6183 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6184 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6185 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6186 return;
6187 case Intrinsic::aarch64_sve_umin_x4:
6189 Node->getValueType(0),
6190 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6191 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6192 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6193 return;
6194 case Intrinsic::aarch64_sve_fmin_x4:
6196 Node->getValueType(0),
6197 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6198 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6199 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6200 return;
6201 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6203 Node->getValueType(0),
6204 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6205 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6206 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6207 return;
6208 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6210 Node->getValueType(0),
6211 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6212 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6213 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6214 return;
6215 case Intrinsic::aarch64_sve_fminnm_single_x2:
6217 Node->getValueType(0),
6218 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6219 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6220 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6221 return;
6222 case Intrinsic::aarch64_sve_fminnm_single_x4:
6224 Node->getValueType(0),
6225 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6226 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6227 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6228 return;
6229 case Intrinsic::aarch64_sve_fmaxnm_x2:
6231 Node->getValueType(0),
6232 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6233 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6234 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6235 return;
6236 case Intrinsic::aarch64_sve_fmaxnm_x4:
6238 Node->getValueType(0),
6239 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6240 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6241 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6242 return;
6243 case Intrinsic::aarch64_sve_fminnm_x2:
6245 Node->getValueType(0),
6246 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6247 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6248 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6249 return;
6250 case Intrinsic::aarch64_sve_fminnm_x4:
6252 Node->getValueType(0),
6253 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6254 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6255 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6256 return;
6257 case Intrinsic::aarch64_sve_fcvtzs_x2:
6258 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6259 return;
6260 case Intrinsic::aarch64_sve_scvtf_x2:
6261 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6262 return;
6263 case Intrinsic::aarch64_sve_fcvtzu_x2:
6264 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6265 return;
6266 case Intrinsic::aarch64_sve_ucvtf_x2:
6267 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6268 return;
6269 case Intrinsic::aarch64_sve_fcvtzs_x4:
6270 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6271 return;
6272 case Intrinsic::aarch64_sve_scvtf_x4:
6273 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6274 return;
6275 case Intrinsic::aarch64_sve_fcvtzu_x4:
6276 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6277 return;
6278 case Intrinsic::aarch64_sve_ucvtf_x4:
6279 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6280 return;
6281 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6282 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6283 return;
6284 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6285 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6286 return;
6287 case Intrinsic::aarch64_sve_sclamp_single_x2:
6289 Node->getValueType(0),
6290 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6291 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6292 SelectClamp(Node, 2, Op);
6293 return;
6294 case Intrinsic::aarch64_sve_uclamp_single_x2:
6296 Node->getValueType(0),
6297 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6298 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6299 SelectClamp(Node, 2, Op);
6300 return;
6301 case Intrinsic::aarch64_sve_fclamp_single_x2:
6303 Node->getValueType(0),
6304 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6305 AArch64::FCLAMP_VG2_2Z2Z_D}))
6306 SelectClamp(Node, 2, Op);
6307 return;
6308 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6309 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6310 return;
6311 case Intrinsic::aarch64_sve_sclamp_single_x4:
6313 Node->getValueType(0),
6314 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6315 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6316 SelectClamp(Node, 4, Op);
6317 return;
6318 case Intrinsic::aarch64_sve_uclamp_single_x4:
6320 Node->getValueType(0),
6321 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6322 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6323 SelectClamp(Node, 4, Op);
6324 return;
6325 case Intrinsic::aarch64_sve_fclamp_single_x4:
6327 Node->getValueType(0),
6328 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6329 AArch64::FCLAMP_VG4_4Z4Z_D}))
6330 SelectClamp(Node, 4, Op);
6331 return;
6332 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6333 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6334 return;
6335 case Intrinsic::aarch64_sve_add_single_x2:
6337 Node->getValueType(0),
6338 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6339 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6340 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6341 return;
6342 case Intrinsic::aarch64_sve_add_single_x4:
6344 Node->getValueType(0),
6345 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6346 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6347 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6348 return;
6349 case Intrinsic::aarch64_sve_zip_x2:
6351 Node->getValueType(0),
6352 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6353 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6354 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6355 return;
6356 case Intrinsic::aarch64_sve_zipq_x2:
6357 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6358 AArch64::ZIP_VG2_2ZZZ_Q);
6359 return;
6360 case Intrinsic::aarch64_sve_zip_x4:
6362 Node->getValueType(0),
6363 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6364 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6365 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6366 return;
6367 case Intrinsic::aarch64_sve_zipq_x4:
6368 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6369 AArch64::ZIP_VG4_4Z4Z_Q);
6370 return;
6371 case Intrinsic::aarch64_sve_uzp_x2:
6373 Node->getValueType(0),
6374 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6375 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6376 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6377 return;
6378 case Intrinsic::aarch64_sve_uzpq_x2:
6379 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6380 AArch64::UZP_VG2_2ZZZ_Q);
6381 return;
6382 case Intrinsic::aarch64_sve_uzp_x4:
6384 Node->getValueType(0),
6385 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6386 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6387 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6388 return;
6389 case Intrinsic::aarch64_sve_uzpq_x4:
6390 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6391 AArch64::UZP_VG4_4Z4Z_Q);
6392 return;
6393 case Intrinsic::aarch64_sve_sel_x2:
6395 Node->getValueType(0),
6396 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6397 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6398 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6399 return;
6400 case Intrinsic::aarch64_sve_sel_x4:
6402 Node->getValueType(0),
6403 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6404 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6405 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6406 return;
6407 case Intrinsic::aarch64_sve_frinta_x2:
6408 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6409 return;
6410 case Intrinsic::aarch64_sve_frinta_x4:
6411 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6412 return;
6413 case Intrinsic::aarch64_sve_frintm_x2:
6414 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6415 return;
6416 case Intrinsic::aarch64_sve_frintm_x4:
6417 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6418 return;
6419 case Intrinsic::aarch64_sve_frintn_x2:
6420 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6421 return;
6422 case Intrinsic::aarch64_sve_frintn_x4:
6423 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6424 return;
6425 case Intrinsic::aarch64_sve_frintp_x2:
6426 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6427 return;
6428 case Intrinsic::aarch64_sve_frintp_x4:
6429 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6430 return;
6431 case Intrinsic::aarch64_sve_sunpk_x2:
6433 Node->getValueType(0),
6434 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6435 AArch64::SUNPK_VG2_2ZZ_D}))
6436 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6437 return;
6438 case Intrinsic::aarch64_sve_uunpk_x2:
6440 Node->getValueType(0),
6441 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6442 AArch64::UUNPK_VG2_2ZZ_D}))
6443 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6444 return;
6445 case Intrinsic::aarch64_sve_sunpk_x4:
6447 Node->getValueType(0),
6448 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6449 AArch64::SUNPK_VG4_4Z2Z_D}))
6450 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6451 return;
6452 case Intrinsic::aarch64_sve_uunpk_x4:
6454 Node->getValueType(0),
6455 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6456 AArch64::UUNPK_VG4_4Z2Z_D}))
6457 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6458 return;
6459 case Intrinsic::aarch64_sve_pext_x2: {
6461 Node->getValueType(0),
6462 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6463 AArch64::PEXT_2PCI_D}))
6464 SelectPExtPair(Node, Op);
6465 return;
6466 }
6467 }
6468 break;
6469 }
6470 case ISD::INTRINSIC_VOID: {
6471 unsigned IntNo = Node->getConstantOperandVal(1);
6472 if (Node->getNumOperands() >= 3)
6473 VT = Node->getOperand(2)->getValueType(0);
6474 switch (IntNo) {
6475 default:
6476 break;
6477 case Intrinsic::aarch64_neon_st1x2: {
6478 if (VT == MVT::v8i8) {
6479 SelectStore(Node, 2, AArch64::ST1Twov8b);
6480 return;
6481 } else if (VT == MVT::v16i8) {
6482 SelectStore(Node, 2, AArch64::ST1Twov16b);
6483 return;
6484 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6485 VT == MVT::v4bf16) {
6486 SelectStore(Node, 2, AArch64::ST1Twov4h);
6487 return;
6488 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6489 VT == MVT::v8bf16) {
6490 SelectStore(Node, 2, AArch64::ST1Twov8h);
6491 return;
6492 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6493 SelectStore(Node, 2, AArch64::ST1Twov2s);
6494 return;
6495 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6496 SelectStore(Node, 2, AArch64::ST1Twov4s);
6497 return;
6498 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6499 SelectStore(Node, 2, AArch64::ST1Twov2d);
6500 return;
6501 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6502 SelectStore(Node, 2, AArch64::ST1Twov1d);
6503 return;
6504 }
6505 break;
6506 }
6507 case Intrinsic::aarch64_neon_st1x3: {
6508 if (VT == MVT::v8i8) {
6509 SelectStore(Node, 3, AArch64::ST1Threev8b);
6510 return;
6511 } else if (VT == MVT::v16i8) {
6512 SelectStore(Node, 3, AArch64::ST1Threev16b);
6513 return;
6514 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6515 VT == MVT::v4bf16) {
6516 SelectStore(Node, 3, AArch64::ST1Threev4h);
6517 return;
6518 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6519 VT == MVT::v8bf16) {
6520 SelectStore(Node, 3, AArch64::ST1Threev8h);
6521 return;
6522 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6523 SelectStore(Node, 3, AArch64::ST1Threev2s);
6524 return;
6525 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6526 SelectStore(Node, 3, AArch64::ST1Threev4s);
6527 return;
6528 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6529 SelectStore(Node, 3, AArch64::ST1Threev2d);
6530 return;
6531 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6532 SelectStore(Node, 3, AArch64::ST1Threev1d);
6533 return;
6534 }
6535 break;
6536 }
6537 case Intrinsic::aarch64_neon_st1x4: {
6538 if (VT == MVT::v8i8) {
6539 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6540 return;
6541 } else if (VT == MVT::v16i8) {
6542 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6543 return;
6544 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6545 VT == MVT::v4bf16) {
6546 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6547 return;
6548 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6549 VT == MVT::v8bf16) {
6550 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6551 return;
6552 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6553 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6554 return;
6555 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6556 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6557 return;
6558 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6559 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6560 return;
6561 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6562 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6563 return;
6564 }
6565 break;
6566 }
6567 case Intrinsic::aarch64_neon_st2: {
6568 if (VT == MVT::v8i8) {
6569 SelectStore(Node, 2, AArch64::ST2Twov8b);
6570 return;
6571 } else if (VT == MVT::v16i8) {
6572 SelectStore(Node, 2, AArch64::ST2Twov16b);
6573 return;
6574 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6575 VT == MVT::v4bf16) {
6576 SelectStore(Node, 2, AArch64::ST2Twov4h);
6577 return;
6578 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6579 VT == MVT::v8bf16) {
6580 SelectStore(Node, 2, AArch64::ST2Twov8h);
6581 return;
6582 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6583 SelectStore(Node, 2, AArch64::ST2Twov2s);
6584 return;
6585 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6586 SelectStore(Node, 2, AArch64::ST2Twov4s);
6587 return;
6588 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6589 SelectStore(Node, 2, AArch64::ST2Twov2d);
6590 return;
6591 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6592 SelectStore(Node, 2, AArch64::ST1Twov1d);
6593 return;
6594 }
6595 break;
6596 }
6597 case Intrinsic::aarch64_neon_st3: {
6598 if (VT == MVT::v8i8) {
6599 SelectStore(Node, 3, AArch64::ST3Threev8b);
6600 return;
6601 } else if (VT == MVT::v16i8) {
6602 SelectStore(Node, 3, AArch64::ST3Threev16b);
6603 return;
6604 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6605 VT == MVT::v4bf16) {
6606 SelectStore(Node, 3, AArch64::ST3Threev4h);
6607 return;
6608 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6609 VT == MVT::v8bf16) {
6610 SelectStore(Node, 3, AArch64::ST3Threev8h);
6611 return;
6612 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6613 SelectStore(Node, 3, AArch64::ST3Threev2s);
6614 return;
6615 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6616 SelectStore(Node, 3, AArch64::ST3Threev4s);
6617 return;
6618 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6619 SelectStore(Node, 3, AArch64::ST3Threev2d);
6620 return;
6621 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6622 SelectStore(Node, 3, AArch64::ST1Threev1d);
6623 return;
6624 }
6625 break;
6626 }
6627 case Intrinsic::aarch64_neon_st4: {
6628 if (VT == MVT::v8i8) {
6629 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6630 return;
6631 } else if (VT == MVT::v16i8) {
6632 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6633 return;
6634 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6635 VT == MVT::v4bf16) {
6636 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6637 return;
6638 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6639 VT == MVT::v8bf16) {
6640 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6641 return;
6642 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6643 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6644 return;
6645 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6646 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6647 return;
6648 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6649 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6650 return;
6651 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6652 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6653 return;
6654 }
6655 break;
6656 }
6657 case Intrinsic::aarch64_neon_st2lane: {
6658 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6659 SelectStoreLane(Node, 2, AArch64::ST2i8);
6660 return;
6661 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6662 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6663 SelectStoreLane(Node, 2, AArch64::ST2i16);
6664 return;
6665 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6666 VT == MVT::v2f32) {
6667 SelectStoreLane(Node, 2, AArch64::ST2i32);
6668 return;
6669 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6670 VT == MVT::v1f64) {
6671 SelectStoreLane(Node, 2, AArch64::ST2i64);
6672 return;
6673 }
6674 break;
6675 }
6676 case Intrinsic::aarch64_neon_st3lane: {
6677 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6678 SelectStoreLane(Node, 3, AArch64::ST3i8);
6679 return;
6680 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6681 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6682 SelectStoreLane(Node, 3, AArch64::ST3i16);
6683 return;
6684 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6685 VT == MVT::v2f32) {
6686 SelectStoreLane(Node, 3, AArch64::ST3i32);
6687 return;
6688 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6689 VT == MVT::v1f64) {
6690 SelectStoreLane(Node, 3, AArch64::ST3i64);
6691 return;
6692 }
6693 break;
6694 }
6695 case Intrinsic::aarch64_neon_st4lane: {
6696 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6697 SelectStoreLane(Node, 4, AArch64::ST4i8);
6698 return;
6699 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6700 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6701 SelectStoreLane(Node, 4, AArch64::ST4i16);
6702 return;
6703 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6704 VT == MVT::v2f32) {
6705 SelectStoreLane(Node, 4, AArch64::ST4i32);
6706 return;
6707 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6708 VT == MVT::v1f64) {
6709 SelectStoreLane(Node, 4, AArch64::ST4i64);
6710 return;
6711 }
6712 break;
6713 }
6714 case Intrinsic::aarch64_sve_st2q: {
6715 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6716 return;
6717 }
6718 case Intrinsic::aarch64_sve_st3q: {
6719 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6720 return;
6721 }
6722 case Intrinsic::aarch64_sve_st4q: {
6723 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6724 return;
6725 }
6726 case Intrinsic::aarch64_sve_st2: {
6727 if (VT == MVT::nxv16i8) {
6728 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6729 return;
6730 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6731 VT == MVT::nxv8bf16) {
6732 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6733 return;
6734 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6735 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6736 return;
6737 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6738 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6739 return;
6740 }
6741 break;
6742 }
6743 case Intrinsic::aarch64_sve_st3: {
6744 if (VT == MVT::nxv16i8) {
6745 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6746 return;
6747 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6748 VT == MVT::nxv8bf16) {
6749 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6750 return;
6751 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6752 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6753 return;
6754 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6755 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6756 return;
6757 }
6758 break;
6759 }
6760 case Intrinsic::aarch64_sve_st4: {
6761 if (VT == MVT::nxv16i8) {
6762 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6763 return;
6764 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6765 VT == MVT::nxv8bf16) {
6766 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6767 return;
6768 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6769 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6770 return;
6771 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6772 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6773 return;
6774 }
6775 break;
6776 }
6777 }
6778 break;
6779 }
6780 case AArch64ISD::LD2post: {
6781 if (VT == MVT::v8i8) {
6782 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6783 return;
6784 } else if (VT == MVT::v16i8) {
6785 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6786 return;
6787 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6788 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6789 return;
6790 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6791 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6792 return;
6793 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6794 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6795 return;
6796 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6797 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6798 return;
6799 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6800 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6801 return;
6802 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6803 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6804 return;
6805 }
6806 break;
6807 }
6808 case AArch64ISD::LD3post: {
6809 if (VT == MVT::v8i8) {
6810 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6811 return;
6812 } else if (VT == MVT::v16i8) {
6813 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6814 return;
6815 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6816 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6817 return;
6818 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6819 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6820 return;
6821 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6822 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6823 return;
6824 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6825 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6826 return;
6827 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6828 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6829 return;
6830 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6831 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6832 return;
6833 }
6834 break;
6835 }
6836 case AArch64ISD::LD4post: {
6837 if (VT == MVT::v8i8) {
6838 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6839 return;
6840 } else if (VT == MVT::v16i8) {
6841 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6842 return;
6843 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6844 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6845 return;
6846 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6847 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6848 return;
6849 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6850 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6851 return;
6852 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6853 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6854 return;
6855 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6856 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6857 return;
6858 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6859 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6860 return;
6861 }
6862 break;
6863 }
6864 case AArch64ISD::LD1x2post: {
6865 if (VT == MVT::v8i8) {
6866 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6867 return;
6868 } else if (VT == MVT::v16i8) {
6869 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6870 return;
6871 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6872 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6873 return;
6874 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6875 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6876 return;
6877 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6878 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6879 return;
6880 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6881 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6882 return;
6883 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6884 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6885 return;
6886 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6887 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6888 return;
6889 }
6890 break;
6891 }
6892 case AArch64ISD::LD1x3post: {
6893 if (VT == MVT::v8i8) {
6894 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6895 return;
6896 } else if (VT == MVT::v16i8) {
6897 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6898 return;
6899 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6900 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6901 return;
6902 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6903 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6904 return;
6905 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6906 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6907 return;
6908 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6909 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6910 return;
6911 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6912 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6913 return;
6914 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6915 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6916 return;
6917 }
6918 break;
6919 }
6920 case AArch64ISD::LD1x4post: {
6921 if (VT == MVT::v8i8) {
6922 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6923 return;
6924 } else if (VT == MVT::v16i8) {
6925 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6926 return;
6927 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6928 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6929 return;
6930 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6931 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6932 return;
6933 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6934 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6935 return;
6936 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6937 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6938 return;
6939 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6940 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6941 return;
6942 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6943 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6944 return;
6945 }
6946 break;
6947 }
6948 case AArch64ISD::LD1DUPpost: {
6949 if (VT == MVT::v8i8) {
6950 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6951 return;
6952 } else if (VT == MVT::v16i8) {
6953 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6954 return;
6955 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6956 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6957 return;
6958 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6959 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6960 return;
6961 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6962 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6963 return;
6964 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6965 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6966 return;
6967 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6968 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6969 return;
6970 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6971 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6972 return;
6973 }
6974 break;
6975 }
6976 case AArch64ISD::LD2DUPpost: {
6977 if (VT == MVT::v8i8) {
6978 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6979 return;
6980 } else if (VT == MVT::v16i8) {
6981 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6982 return;
6983 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6984 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6985 return;
6986 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6987 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
6988 return;
6989 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6990 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
6991 return;
6992 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6993 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6994 return;
6995 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6996 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
6997 return;
6998 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6999 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
7000 return;
7001 }
7002 break;
7003 }
7004 case AArch64ISD::LD3DUPpost: {
7005 if (VT == MVT::v8i8) {
7006 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
7007 return;
7008 } else if (VT == MVT::v16i8) {
7009 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7010 return;
7011 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7012 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7013 return;
7014 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7015 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7016 return;
7017 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7018 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7019 return;
7020 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7021 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7022 return;
7023 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7024 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7025 return;
7026 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7027 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7028 return;
7029 }
7030 break;
7031 }
7032 case AArch64ISD::LD4DUPpost: {
7033 if (VT == MVT::v8i8) {
7034 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7035 return;
7036 } else if (VT == MVT::v16i8) {
7037 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7038 return;
7039 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7040 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7041 return;
7042 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7043 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7044 return;
7045 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7046 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7047 return;
7048 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7049 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7050 return;
7051 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7052 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7053 return;
7054 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7055 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7056 return;
7057 }
7058 break;
7059 }
7060 case AArch64ISD::LD1LANEpost: {
7061 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7062 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7063 return;
7064 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7065 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7066 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7067 return;
7068 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7069 VT == MVT::v2f32) {
7070 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7071 return;
7072 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7073 VT == MVT::v1f64) {
7074 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7075 return;
7076 }
7077 break;
7078 }
7079 case AArch64ISD::LD2LANEpost: {
7080 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7081 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7082 return;
7083 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7084 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7085 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7086 return;
7087 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7088 VT == MVT::v2f32) {
7089 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7090 return;
7091 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7092 VT == MVT::v1f64) {
7093 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7094 return;
7095 }
7096 break;
7097 }
7098 case AArch64ISD::LD3LANEpost: {
7099 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7100 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7101 return;
7102 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7103 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7104 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7105 return;
7106 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7107 VT == MVT::v2f32) {
7108 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7109 return;
7110 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7111 VT == MVT::v1f64) {
7112 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7113 return;
7114 }
7115 break;
7116 }
7117 case AArch64ISD::LD4LANEpost: {
7118 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7119 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7120 return;
7121 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7122 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7123 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7124 return;
7125 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7126 VT == MVT::v2f32) {
7127 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7128 return;
7129 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7130 VT == MVT::v1f64) {
7131 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7132 return;
7133 }
7134 break;
7135 }
7136 case AArch64ISD::ST2post: {
7137 VT = Node->getOperand(1).getValueType();
7138 if (VT == MVT::v8i8) {
7139 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7140 return;
7141 } else if (VT == MVT::v16i8) {
7142 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7143 return;
7144 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7145 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7146 return;
7147 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7148 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7149 return;
7150 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7151 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7152 return;
7153 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7154 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7155 return;
7156 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7157 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7158 return;
7159 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7160 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7161 return;
7162 }
7163 break;
7164 }
7165 case AArch64ISD::ST3post: {
7166 VT = Node->getOperand(1).getValueType();
7167 if (VT == MVT::v8i8) {
7168 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7169 return;
7170 } else if (VT == MVT::v16i8) {
7171 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7172 return;
7173 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7174 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7175 return;
7176 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7177 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7178 return;
7179 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7180 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7181 return;
7182 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7183 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7184 return;
7185 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7186 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7187 return;
7188 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7189 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7190 return;
7191 }
7192 break;
7193 }
7194 case AArch64ISD::ST4post: {
7195 VT = Node->getOperand(1).getValueType();
7196 if (VT == MVT::v8i8) {
7197 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7198 return;
7199 } else if (VT == MVT::v16i8) {
7200 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7201 return;
7202 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7203 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7204 return;
7205 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7206 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7207 return;
7208 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7209 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7210 return;
7211 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7212 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7213 return;
7214 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7215 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7216 return;
7217 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7218 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7219 return;
7220 }
7221 break;
7222 }
7223 case AArch64ISD::ST1x2post: {
7224 VT = Node->getOperand(1).getValueType();
7225 if (VT == MVT::v8i8) {
7226 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7227 return;
7228 } else if (VT == MVT::v16i8) {
7229 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7230 return;
7231 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7232 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7233 return;
7234 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7235 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7236 return;
7237 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7238 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7239 return;
7240 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7241 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7242 return;
7243 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7244 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7245 return;
7246 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7247 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7248 return;
7249 }
7250 break;
7251 }
7252 case AArch64ISD::ST1x3post: {
7253 VT = Node->getOperand(1).getValueType();
7254 if (VT == MVT::v8i8) {
7255 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7256 return;
7257 } else if (VT == MVT::v16i8) {
7258 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7259 return;
7260 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7261 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7262 return;
7263 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7264 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7265 return;
7266 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7267 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7268 return;
7269 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7270 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7271 return;
7272 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7273 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7274 return;
7275 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7276 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7277 return;
7278 }
7279 break;
7280 }
7281 case AArch64ISD::ST1x4post: {
7282 VT = Node->getOperand(1).getValueType();
7283 if (VT == MVT::v8i8) {
7284 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7285 return;
7286 } else if (VT == MVT::v16i8) {
7287 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7288 return;
7289 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7290 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7291 return;
7292 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7293 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7294 return;
7295 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7296 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7297 return;
7298 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7299 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7300 return;
7301 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7302 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7303 return;
7304 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7305 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7306 return;
7307 }
7308 break;
7309 }
7310 case AArch64ISD::ST2LANEpost: {
7311 VT = Node->getOperand(1).getValueType();
7312 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7313 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7314 return;
7315 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7316 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7317 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7318 return;
7319 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7320 VT == MVT::v2f32) {
7321 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7322 return;
7323 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7324 VT == MVT::v1f64) {
7325 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7326 return;
7327 }
7328 break;
7329 }
7330 case AArch64ISD::ST3LANEpost: {
7331 VT = Node->getOperand(1).getValueType();
7332 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7333 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7334 return;
7335 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7336 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7337 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7338 return;
7339 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7340 VT == MVT::v2f32) {
7341 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7342 return;
7343 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7344 VT == MVT::v1f64) {
7345 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7346 return;
7347 }
7348 break;
7349 }
7350 case AArch64ISD::ST4LANEpost: {
7351 VT = Node->getOperand(1).getValueType();
7352 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7353 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7354 return;
7355 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7356 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7357 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7358 return;
7359 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7360 VT == MVT::v2f32) {
7361 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7362 return;
7363 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7364 VT == MVT::v1f64) {
7365 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7366 return;
7367 }
7368 break;
7369 }
7370 }
7371
7372 // Select the default instruction
7373 SelectCode(Node);
7374}
7375
7376/// createAArch64ISelDag - This pass converts a legalized DAG into a
7377/// AArch64-specific DAG, ready for instruction scheduling.
7379 CodeGenOptLevel OptLevel) {
7380 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7381}
7382
7383/// When \p PredVT is a scalable vector predicate in the form
7384/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7385/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7386/// structured vectors (NumVec >1), the output data type is
7387/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7388/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7389/// EVT.
7391 unsigned NumVec) {
7392 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7393 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7394 return EVT();
7395
7396 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7397 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7398 return EVT();
7399
7400 ElementCount EC = PredVT.getVectorElementCount();
7401 EVT ScalarVT =
7402 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7403 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7404
7405 return MemVT;
7406}
7407
7408/// Return the EVT of the data associated to a memory operation in \p
7409/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7411 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7412 return MemIntr->getMemoryVT();
7413
7414 if (isa<MemSDNode>(Root)) {
7415 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7416
7417 EVT DataVT;
7418 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7419 DataVT = Load->getValueType(0);
7420 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7421 DataVT = Load->getValueType(0);
7422 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7423 DataVT = Store->getValue().getValueType();
7424 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7425 DataVT = Store->getValue().getValueType();
7426 else
7427 llvm_unreachable("Unexpected MemSDNode!");
7428
7429 return DataVT.changeVectorElementType(MemVT.getVectorElementType());
7430 }
7431
7432 const unsigned Opcode = Root->getOpcode();
7433 // For custom ISD nodes, we have to look at them individually to extract the
7434 // type of the data moved to/from memory.
7435 switch (Opcode) {
7436 case AArch64ISD::LD1_MERGE_ZERO:
7437 case AArch64ISD::LD1S_MERGE_ZERO:
7438 case AArch64ISD::LDNF1_MERGE_ZERO:
7439 case AArch64ISD::LDNF1S_MERGE_ZERO:
7440 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7441 case AArch64ISD::ST1_PRED:
7442 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7443 default:
7444 break;
7445 }
7446
7447 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7448 return EVT();
7449
7450 switch (Root->getConstantOperandVal(1)) {
7451 default:
7452 return EVT();
7453 case Intrinsic::aarch64_sme_ldr:
7454 case Intrinsic::aarch64_sme_str:
7455 return MVT::nxv16i8;
7456 case Intrinsic::aarch64_sve_prf:
7457 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7458 // width of the predicate.
7460 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7461 case Intrinsic::aarch64_sve_ld2_sret:
7462 case Intrinsic::aarch64_sve_ld2q_sret:
7464 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7465 case Intrinsic::aarch64_sve_st2q:
7467 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7468 case Intrinsic::aarch64_sve_ld3_sret:
7469 case Intrinsic::aarch64_sve_ld3q_sret:
7471 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7472 case Intrinsic::aarch64_sve_st3q:
7474 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7475 case Intrinsic::aarch64_sve_ld4_sret:
7476 case Intrinsic::aarch64_sve_ld4q_sret:
7478 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7479 case Intrinsic::aarch64_sve_st4q:
7481 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7482 case Intrinsic::aarch64_sve_ld1udq:
7483 case Intrinsic::aarch64_sve_st1dq:
7484 return EVT(MVT::nxv1i64);
7485 case Intrinsic::aarch64_sve_ld1uwq:
7486 case Intrinsic::aarch64_sve_st1wq:
7487 return EVT(MVT::nxv1i32);
7488 }
7489}
7490
7491/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7492/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7493/// where Root is the memory access using N for its address.
7494template <int64_t Min, int64_t Max>
7495bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7496 SDValue &Base,
7497 SDValue &OffImm) {
7498 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7499 const DataLayout &DL = CurDAG->getDataLayout();
7500 const MachineFrameInfo &MFI = MF->getFrameInfo();
7501
7502 if (N.getOpcode() == ISD::FrameIndex) {
7503 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7504 // We can only encode VL scaled offsets, so only fold in frame indexes
7505 // referencing SVE objects.
7506 if (MFI.hasScalableStackID(FI)) {
7507 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7508 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7509 return true;
7510 }
7511
7512 return false;
7513 }
7514
7515 if (MemVT == EVT())
7516 return false;
7517
7518 if (N.getOpcode() != ISD::ADD)
7519 return false;
7520
7521 SDValue VScale = N.getOperand(1);
7522 int64_t MulImm = std::numeric_limits<int64_t>::max();
7523 if (VScale.getOpcode() == ISD::VSCALE) {
7524 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7525 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7526 int64_t ByteOffset = C->getSExtValue();
7527 const auto KnownVScale =
7529
7530 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7531 return false;
7532
7533 MulImm = ByteOffset / KnownVScale;
7534 } else
7535 return false;
7536
7537 TypeSize TS = MemVT.getSizeInBits();
7538 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7539
7540 if ((MulImm % MemWidthBytes) != 0)
7541 return false;
7542
7543 int64_t Offset = MulImm / MemWidthBytes;
7545 return false;
7546
7547 Base = N.getOperand(0);
7548 if (Base.getOpcode() == ISD::FrameIndex) {
7549 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7550 // We can only encode VL scaled offsets, so only fold in frame indexes
7551 // referencing SVE objects.
7552 if (MFI.hasScalableStackID(FI))
7553 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7554 }
7555
7556 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7557 return true;
7558}
7559
7560/// Select register plus register addressing mode for SVE, with scaled
7561/// offset.
7562bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7563 SDValue &Base,
7564 SDValue &Offset) {
7565 if (N.getOpcode() != ISD::ADD)
7566 return false;
7567
7568 // Process an ADD node.
7569 const SDValue LHS = N.getOperand(0);
7570 const SDValue RHS = N.getOperand(1);
7571
7572 // 8 bit data does not come with the SHL node, so it is treated
7573 // separately.
7574 if (Scale == 0) {
7575 Base = LHS;
7576 Offset = RHS;
7577 return true;
7578 }
7579
7580 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7581 int64_t ImmOff = C->getSExtValue();
7582 unsigned Size = 1 << Scale;
7583
7584 // To use the reg+reg addressing mode, the immediate must be a multiple of
7585 // the vector element's byte size.
7586 if (ImmOff % Size)
7587 return false;
7588
7589 SDLoc DL(N);
7590 Base = LHS;
7591 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7592 SDValue Ops[] = {Offset};
7593 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7594 Offset = SDValue(MI, 0);
7595 return true;
7596 }
7597
7598 // Check if the RHS is a shift node with a constant.
7599 if (RHS.getOpcode() != ISD::SHL)
7600 return false;
7601
7602 const SDValue ShiftRHS = RHS.getOperand(1);
7603 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7604 if (C->getZExtValue() == Scale) {
7605 Base = LHS;
7606 Offset = RHS.getOperand(0);
7607 return true;
7608 }
7609
7610 return false;
7611}
7612
7613bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7614 const AArch64TargetLowering *TLI =
7615 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7616
7617 return TLI->isAllActivePredicate(*CurDAG, N);
7618}
7619
7620bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7621 EVT VT = N.getValueType();
7622 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7623}
7624
7625bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7627 unsigned Scale) {
7628 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7629 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
7630 int64_t ImmOff = C->getSExtValue();
7631 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7632 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7633 }
7634 return SDValue();
7635 };
7636
7637 if (SDValue C = MatchConstantOffset(N)) {
7638 Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7639 Offset = C;
7640 return true;
7641 }
7642
7643 // Try to untangle an ADD node into a 'reg + offset'
7644 if (CurDAG->isBaseWithConstantOffset(N)) {
7645 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7646 Base = N.getOperand(0);
7647 Offset = C;
7648 return true;
7649 }
7650 }
7651
7652 // By default, just match reg + 0.
7653 Base = N;
7654 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7655 return true;
7656}
7657
7658bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7659 SDValue &Imm) {
7661 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
7662 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
7663 // Check conservatively if the immediate fits the valid range [0, 64).
7664 // Immediate variants for GE and HS definitely need to be decremented
7665 // when lowering the pseudos later, so an immediate of 1 would become 0.
7666 // For the inverse conditions LT and LO we don't know for sure if they
7667 // will need a decrement but should the decision be made to reverse the
7668 // branch condition, we again end up with the need to decrement.
7669 // The same argument holds for LE, LS, GT and HI and possibly
7670 // incremented immediates. This can lead to slightly less optimal
7671 // codegen, e.g. we never codegen the legal case
7672 // cblt w0, #63, A
7673 // because we could end up with the illegal case
7674 // cbge w0, #64, B
7675 // should the decision to reverse the branch direction be made. For the
7676 // lower bound cases this is no problem since we can express comparisons
7677 // against 0 with either tbz/tnbz or using wzr/xzr.
7678 uint64_t LowerBound = 0, UpperBound = 64;
7679 switch (CC) {
7680 case AArch64CC::GE:
7681 case AArch64CC::HS:
7682 case AArch64CC::LT:
7683 case AArch64CC::LO:
7684 LowerBound = 1;
7685 break;
7686 case AArch64CC::LE:
7687 case AArch64CC::LS:
7688 case AArch64CC::GT:
7689 case AArch64CC::HI:
7690 UpperBound = 63;
7691 break;
7692 default:
7693 break;
7694 }
7695
7696 if (CN->getAPIntValue().uge(LowerBound) &&
7697 CN->getAPIntValue().ult(UpperBound)) {
7698 SDLoc DL(N);
7699 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
7700 return true;
7701 }
7702 }
7703
7704 return false;
7705}
7706
7707template <bool MatchCBB>
7708bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
7709 SDValue &ExtType) {
7710
7711 // Use an invalid shift-extend value to indicate we don't need to extend later
7712 if (N.getOpcode() == ISD::AssertZext || N.getOpcode() == ISD::AssertSext) {
7713 EVT Ty = cast<VTSDNode>(N.getOperand(1))->getVT();
7714 if (Ty != (MatchCBB ? MVT::i8 : MVT::i16))
7715 return false;
7716 Reg = N.getOperand(0);
7717 ExtType = CurDAG->getSignedTargetConstant(AArch64_AM::InvalidShiftExtend,
7718 SDLoc(N), MVT::i32);
7719 return true;
7720 }
7721
7723
7724 if ((MatchCBB && (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB)) ||
7725 (!MatchCBB && (ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH))) {
7726 Reg = N.getOperand(0);
7727 ExtType =
7728 CurDAG->getTargetConstant(getExtendEncoding(ET), SDLoc(N), MVT::i32);
7729 return true;
7730 }
7731
7732 return false;
7733}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
return SDValue()
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define DEBUG_TYPE
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_NAME
Value * RHS
Value * LHS
const AArch64RegisterInfo * getRegisterInfo() const override
bool isStreaming() const
Returns true if the function has a streaming body.
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
unsigned getSVEVectorSizeInBits() const
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
LLVM_ABI bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5995
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1314
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1671
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1640
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1599
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1453
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:859
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasScalableStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getID() const
Return the register class ID number.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:956
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
unsigned getExtendEncoding(AArch64_AM::ShiftExtendType ET)
Mapping from extend bits to required operation: shifter: 000 ==> uxtb 001 ==> uxth 010 ==> uxtw 011 =...
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1968
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
Extended Value Type.
Definition ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:202
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.