LLVM 22.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64
65 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
66 /// inline asm expressions.
67 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
68 InlineAsm::ConstraintCode ConstraintID,
69 std::vector<SDValue> &OutOps) override;
70
71 template <signed Low, signed High, signed Scale>
72 bool SelectRDVLImm(SDValue N, SDValue &Imm);
73
74 template <signed Low, signed High>
75 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
76
77 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
78 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
79 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
80 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
81 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
82 return SelectShiftedRegister(N, false, Reg, Shift);
83 }
84 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
85 return SelectShiftedRegister(N, true, Reg, Shift);
86 }
87 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
88 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
89 }
90 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
91 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
92 }
93 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
94 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
95 }
96 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
97 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
98 }
99 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
100 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
101 }
102 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
103 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
104 }
105 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
106 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
107 }
108 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
109 return SelectAddrModeIndexed(N, 1, Base, OffImm);
110 }
111 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
112 return SelectAddrModeIndexed(N, 2, Base, OffImm);
113 }
114 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
115 return SelectAddrModeIndexed(N, 4, Base, OffImm);
116 }
117 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
118 return SelectAddrModeIndexed(N, 8, Base, OffImm);
119 }
120 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
121 return SelectAddrModeIndexed(N, 16, Base, OffImm);
122 }
123 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
124 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
125 }
126 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
127 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
128 }
129 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
130 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
131 }
132 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
133 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
134 }
135 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
136 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
137 }
138 template <unsigned Size, unsigned Max>
139 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
140 // Test if there is an appropriate addressing mode and check if the
141 // immediate fits.
142 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
143 if (Found) {
144 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
145 int64_t C = CI->getSExtValue();
146 if (C <= Max)
147 return true;
148 }
149 }
150
151 // Otherwise, base only, materialize address in register.
152 Base = N;
153 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
154 return true;
155 }
156
157 template<int Width>
158 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
159 SDValue &SignExtend, SDValue &DoShift) {
160 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
161 }
162
163 template<int Width>
164 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
165 SDValue &SignExtend, SDValue &DoShift) {
166 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
167 }
168
169 bool SelectExtractHigh(SDValue N, SDValue &Res) {
170 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
171 N = N->getOperand(0);
172 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
173 !isa<ConstantSDNode>(N->getOperand(1)))
174 return false;
175 EVT VT = N->getValueType(0);
176 EVT LVT = N->getOperand(0).getValueType();
177 unsigned Index = N->getConstantOperandVal(1);
178 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
179 Index != VT.getVectorNumElements())
180 return false;
181 Res = N->getOperand(0);
182 return true;
183 }
184
185 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
186 if (N.getOpcode() != AArch64ISD::VLSHR)
187 return false;
188 SDValue Op = N->getOperand(0);
189 EVT VT = Op.getValueType();
190 unsigned ShtAmt = N->getConstantOperandVal(1);
191 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
192 return false;
193
194 APInt Imm;
195 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
196 Imm = APInt(VT.getScalarSizeInBits(),
197 Op.getOperand(1).getConstantOperandVal(0)
198 << Op.getOperand(1).getConstantOperandVal(1));
199 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
200 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
201 Imm = APInt(VT.getScalarSizeInBits(),
202 Op.getOperand(1).getConstantOperandVal(0));
203 else
204 return false;
205
206 if (Imm != 1ULL << (ShtAmt - 1))
207 return false;
208
209 Res1 = Op.getOperand(0);
210 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
211 return true;
212 }
213
214 bool SelectDupZeroOrUndef(SDValue N) {
215 switch(N->getOpcode()) {
216 case ISD::UNDEF:
217 return true;
218 case AArch64ISD::DUP:
219 case ISD::SPLAT_VECTOR: {
220 auto Opnd0 = N->getOperand(0);
221 if (isNullConstant(Opnd0))
222 return true;
223 if (isNullFPConstant(Opnd0))
224 return true;
225 break;
226 }
227 default:
228 break;
229 }
230
231 return false;
232 }
233
234 bool SelectAny(SDValue) { return true; }
235
236 bool SelectDupZero(SDValue N) {
237 switch(N->getOpcode()) {
238 case AArch64ISD::DUP:
239 case ISD::SPLAT_VECTOR: {
240 auto Opnd0 = N->getOperand(0);
241 if (isNullConstant(Opnd0))
242 return true;
243 if (isNullFPConstant(Opnd0))
244 return true;
245 break;
246 }
247 }
248
249 return false;
250 }
251
252 template <MVT::SimpleValueType VT, bool Negate>
253 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
254 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
255 }
256
257 template <MVT::SimpleValueType VT, bool Negate>
258 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
259 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
260 }
261
262 template <MVT::SimpleValueType VT>
263 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
264 return SelectSVECpyDupImm(N, VT, Imm, Shift);
265 }
266
267 template <MVT::SimpleValueType VT, bool Invert = false>
268 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
269 return SelectSVELogicalImm(N, VT, Imm, Invert);
270 }
271
272 template <MVT::SimpleValueType VT>
273 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
274 return SelectSVEArithImm(N, VT, Imm);
275 }
276
277 template <unsigned Low, unsigned High, bool AllowSaturation = false>
278 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
279 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
280 }
281
282 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
283 if (N->getOpcode() != ISD::SPLAT_VECTOR)
284 return false;
285
286 EVT EltVT = N->getValueType(0).getVectorElementType();
287 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
288 /* High */ EltVT.getFixedSizeInBits(),
289 /* AllowSaturation */ true, Imm);
290 }
291
292 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
293 template<signed Min, signed Max, signed Scale, bool Shift>
294 bool SelectCntImm(SDValue N, SDValue &Imm) {
296 return false;
297
298 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
299 if (Shift)
300 MulImm = 1LL << MulImm;
301
302 if ((MulImm % std::abs(Scale)) != 0)
303 return false;
304
305 MulImm /= Scale;
306 if ((MulImm >= Min) && (MulImm <= Max)) {
307 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
308 return true;
309 }
310
311 return false;
312 }
313
314 template <signed Max, signed Scale>
315 bool SelectEXTImm(SDValue N, SDValue &Imm) {
317 return false;
318
319 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
320
321 if (MulImm >= 0 && MulImm <= Max) {
322 MulImm *= Scale;
323 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
324 return true;
325 }
326
327 return false;
328 }
329
330 template <unsigned BaseReg, unsigned Max>
331 bool ImmToReg(SDValue N, SDValue &Imm) {
332 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
333 uint64_t C = CI->getZExtValue();
334
335 if (C > Max)
336 return false;
337
338 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
339 return true;
340 }
341 return false;
342 }
343
344 /// Form sequences of consecutive 64/128-bit registers for use in NEON
345 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
346 /// between 1 and 4 elements. If it contains a single element that is returned
347 /// unchanged; otherwise a REG_SEQUENCE value is returned.
350 // Form a sequence of SVE registers for instructions using list of vectors,
351 // e.g. structured loads and stores (ldN, stN).
352 SDValue createZTuple(ArrayRef<SDValue> Vecs);
353
354 // Similar to above, except the register must start at a multiple of the
355 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
356 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
357
358 /// Generic helper for the createDTuple/createQTuple
359 /// functions. Those should almost always be called instead.
360 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
361 const unsigned SubRegs[]);
362
363 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
364
365 bool tryIndexedLoad(SDNode *N);
366
367 void SelectPtrauthAuth(SDNode *N);
368 void SelectPtrauthResign(SDNode *N);
369
370 bool trySelectStackSlotTagP(SDNode *N);
371 void SelectTagP(SDNode *N);
372
373 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
374 unsigned SubRegIdx);
375 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
376 unsigned SubRegIdx);
377 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
378 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
380 unsigned Opc_rr, unsigned Opc_ri,
381 bool IsIntr = false);
382 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
383 unsigned Scale, unsigned Opc_ri,
384 unsigned Opc_rr);
385 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
386 bool IsZmMulti, unsigned Opcode,
387 bool HasPred = false);
388 void SelectPExtPair(SDNode *N, unsigned Opc);
389 void SelectWhilePair(SDNode *N, unsigned Opc);
390 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
391 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
392 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
394 bool IsTupleInput, unsigned Opc);
395 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
396
397 template <unsigned MaxIdx, unsigned Scale>
398 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
399 unsigned Op);
400 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
401 unsigned Op, unsigned MaxIdx, unsigned Scale,
402 unsigned BaseReg = 0);
403 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
404 /// SVE Reg+Imm addressing mode.
405 template <int64_t Min, int64_t Max>
406 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
407 SDValue &OffImm);
408 /// SVE Reg+Reg address mode.
409 template <unsigned Scale>
410 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
411 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
412 }
413
414 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
415 unsigned Opc, uint32_t MaxImm);
416
417 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
418
419 template <unsigned MaxIdx, unsigned Scale>
420 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
421 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
422 }
423
424 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
425 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
426 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
428 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
429 unsigned Opc_rr, unsigned Opc_ri);
430 std::tuple<unsigned, SDValue, SDValue>
431 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
432 const SDValue &OldBase, const SDValue &OldOffset,
433 unsigned Scale);
434
435 bool tryBitfieldExtractOp(SDNode *N);
436 bool tryBitfieldExtractOpFromSExt(SDNode *N);
437 bool tryBitfieldInsertOp(SDNode *N);
438 bool tryBitfieldInsertInZeroOp(SDNode *N);
439 bool tryShiftAmountMod(SDNode *N);
440
441 bool tryReadRegister(SDNode *N);
442 bool tryWriteRegister(SDNode *N);
443
444 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
445 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
446
447 bool trySelectXAR(SDNode *N);
448
449// Include the pieces autogenerated from the target description.
450#include "AArch64GenDAGISel.inc"
451
452private:
453 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
454 SDValue &Shift);
455 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
456 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
457 SDValue &OffImm) {
458 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
459 }
460 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
461 unsigned Size, SDValue &Base,
462 SDValue &OffImm);
463 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
464 SDValue &OffImm);
465 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
466 SDValue &OffImm);
467 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
468 SDValue &Offset, SDValue &SignExtend,
469 SDValue &DoShift);
470 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
471 SDValue &Offset, SDValue &SignExtend,
472 SDValue &DoShift);
473 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
474 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
475 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
476 SDValue &Offset, SDValue &SignExtend);
477
478 template<unsigned RegWidth>
479 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
480 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
481 }
482
483 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
484
485 template<unsigned RegWidth>
486 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
487 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
488 }
489
490 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
491 unsigned Width);
492
493 bool SelectCMP_SWAP(SDNode *N);
494
495 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
496 bool Negate);
497 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
498 bool Negate);
499 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
500 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
501
502 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
503 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
504 bool AllowSaturation, SDValue &Imm);
505
506 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
507 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
508 SDValue &Offset);
509 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
510 SDValue &Offset, unsigned Scale = 1);
511
512 bool SelectAllActivePredicate(SDValue N);
513 bool SelectAnyPredicate(SDValue N);
514
515 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
516
517 template <bool MatchCBB>
518 bool SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, SDValue &ExtType);
519};
520
521class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
522public:
523 static char ID;
524 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
525 CodeGenOptLevel OptLevel)
527 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
528};
529} // end anonymous namespace
530
531char AArch64DAGToDAGISelLegacy::ID = 0;
532
533INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
534
535/// isIntImmediate - This method tests to see if the node is a constant
536/// operand. If so Imm will receive the 32-bit value.
537static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
539 Imm = C->getZExtValue();
540 return true;
541 }
542 return false;
543}
544
545// isIntImmediate - This method tests to see if a constant operand.
546// If so Imm will receive the value.
547static bool isIntImmediate(SDValue N, uint64_t &Imm) {
548 return isIntImmediate(N.getNode(), Imm);
549}
550
551// isOpcWithIntImmediate - This method tests to see if the node is a specific
552// opcode and that it has a immediate integer right operand.
553// If so Imm will receive the 32 bit value.
554static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
555 uint64_t &Imm) {
556 return N->getOpcode() == Opc &&
557 isIntImmediate(N->getOperand(1).getNode(), Imm);
558}
559
560// isIntImmediateEq - This method tests to see if N is a constant operand that
561// is equivalent to 'ImmExpected'.
562#ifndef NDEBUG
563static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
564 uint64_t Imm;
565 if (!isIntImmediate(N.getNode(), Imm))
566 return false;
567 return Imm == ImmExpected;
568}
569#endif
570
571bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
572 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
573 std::vector<SDValue> &OutOps) {
574 switch(ConstraintID) {
575 default:
576 llvm_unreachable("Unexpected asm memory constraint");
577 case InlineAsm::ConstraintCode::m:
578 case InlineAsm::ConstraintCode::o:
579 case InlineAsm::ConstraintCode::Q:
580 // We need to make sure that this one operand does not end up in XZR, thus
581 // require the address to be in a PointerRegClass register.
582 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
583 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
584 SDLoc dl(Op);
585 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
586 SDValue NewOp =
587 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
588 dl, Op.getValueType(),
589 Op, RC), 0);
590 OutOps.push_back(NewOp);
591 return false;
592 }
593 return true;
594}
595
596/// SelectArithImmed - Select an immediate value that can be represented as
597/// a 12-bit value shifted left by either 0 or 12. If so, return true with
598/// Val set to the 12-bit value and Shift set to the shifter operand.
599bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
600 SDValue &Shift) {
601 // This function is called from the addsub_shifted_imm ComplexPattern,
602 // which lists [imm] as the list of opcode it's interested in, however
603 // we still need to check whether the operand is actually an immediate
604 // here because the ComplexPattern opcode list is only used in
605 // root-level opcode matching.
606 if (!isa<ConstantSDNode>(N.getNode()))
607 return false;
608
609 uint64_t Immed = N.getNode()->getAsZExtVal();
610 unsigned ShiftAmt;
611
612 if (Immed >> 12 == 0) {
613 ShiftAmt = 0;
614 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
615 ShiftAmt = 12;
616 Immed = Immed >> 12;
617 } else
618 return false;
619
620 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
621 SDLoc dl(N);
622 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
623 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
624 return true;
625}
626
627/// SelectNegArithImmed - As above, but negates the value before trying to
628/// select it.
629bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
630 SDValue &Shift) {
631 // This function is called from the addsub_shifted_imm ComplexPattern,
632 // which lists [imm] as the list of opcode it's interested in, however
633 // we still need to check whether the operand is actually an immediate
634 // here because the ComplexPattern opcode list is only used in
635 // root-level opcode matching.
636 if (!isa<ConstantSDNode>(N.getNode()))
637 return false;
638
639 // The immediate operand must be a 24-bit zero-extended immediate.
640 uint64_t Immed = N.getNode()->getAsZExtVal();
641
642 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
643 // have the opposite effect on the C flag, so this pattern mustn't match under
644 // those circumstances.
645 if (Immed == 0)
646 return false;
647
648 if (N.getValueType() == MVT::i32)
649 Immed = ~((uint32_t)Immed) + 1;
650 else
651 Immed = ~Immed + 1ULL;
652 if (Immed & 0xFFFFFFFFFF000000ULL)
653 return false;
654
655 Immed &= 0xFFFFFFULL;
656 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
657 Shift);
658}
659
660/// getShiftTypeForNode - Translate a shift node to the corresponding
661/// ShiftType value.
663 switch (N.getOpcode()) {
664 default:
666 case ISD::SHL:
667 return AArch64_AM::LSL;
668 case ISD::SRL:
669 return AArch64_AM::LSR;
670 case ISD::SRA:
671 return AArch64_AM::ASR;
672 case ISD::ROTR:
673 return AArch64_AM::ROR;
674 }
675}
676
678 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
679}
680
681/// Determine whether it is worth it to fold SHL into the addressing
682/// mode.
684 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
685 // It is worth folding logical shift of up to three places.
686 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
687 if (!CSD)
688 return false;
689 unsigned ShiftVal = CSD->getZExtValue();
690 if (ShiftVal > 3)
691 return false;
692
693 // Check if this particular node is reused in any non-memory related
694 // operation. If yes, do not try to fold this node into the address
695 // computation, since the computation will be kept.
696 const SDNode *Node = V.getNode();
697 for (SDNode *UI : Node->users())
698 if (!isMemOpOrPrefetch(UI))
699 for (SDNode *UII : UI->users())
700 if (!isMemOpOrPrefetch(UII))
701 return false;
702 return true;
703}
704
705/// Determine whether it is worth to fold V into an extended register addressing
706/// mode.
707bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
708 // Trivial if we are optimizing for code size or if there is only
709 // one use of the value.
710 if (CurDAG->shouldOptForSize() || V.hasOneUse())
711 return true;
712
713 // If a subtarget has a slow shift, folding a shift into multiple loads
714 // costs additional micro-ops.
715 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
716 return false;
717
718 // Check whether we're going to emit the address arithmetic anyway because
719 // it's used by a non-address operation.
720 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
721 return true;
722 if (V.getOpcode() == ISD::ADD) {
723 const SDValue LHS = V.getOperand(0);
724 const SDValue RHS = V.getOperand(1);
725 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
726 return true;
727 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
728 return true;
729 }
730
731 // It hurts otherwise, since the value will be reused.
732 return false;
733}
734
735/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
736/// to select more shifted register
737bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
738 SDValue &Shift) {
739 EVT VT = N.getValueType();
740 if (VT != MVT::i32 && VT != MVT::i64)
741 return false;
742
743 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
744 return false;
745 SDValue LHS = N.getOperand(0);
746 if (!LHS->hasOneUse())
747 return false;
748
749 unsigned LHSOpcode = LHS->getOpcode();
750 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
751 return false;
752
753 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
754 if (!ShiftAmtNode)
755 return false;
756
757 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
758 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
759 if (!RHSC)
760 return false;
761
762 APInt AndMask = RHSC->getAPIntValue();
763 unsigned LowZBits, MaskLen;
764 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
765 return false;
766
767 unsigned BitWidth = N.getValueSizeInBits();
768 SDLoc DL(LHS);
769 uint64_t NewShiftC;
770 unsigned NewShiftOp;
771 if (LHSOpcode == ISD::SHL) {
772 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
773 // BitWidth != LowZBits + MaskLen doesn't match the pattern
774 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
775 return false;
776
777 NewShiftC = LowZBits - ShiftAmtC;
778 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
779 } else {
780 if (LowZBits == 0)
781 return false;
782
783 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
784 NewShiftC = LowZBits + ShiftAmtC;
785 if (NewShiftC >= BitWidth)
786 return false;
787
788 // SRA need all high bits
789 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
790 return false;
791
792 // SRL high bits can be 0 or 1
793 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
794 return false;
795
796 if (LHSOpcode == ISD::SRL)
797 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
798 else
799 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
800 }
801
802 assert(NewShiftC < BitWidth && "Invalid shift amount");
803 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
804 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
805 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
806 NewShiftAmt, BitWidthMinus1),
807 0);
808 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
809 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
810 return true;
811}
812
813/// getExtendTypeForNode - Translate an extend node to the corresponding
814/// ExtendType value.
816getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
817 if (N.getOpcode() == ISD::SIGN_EXTEND ||
818 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
819 EVT SrcVT;
820 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
821 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
822 else
823 SrcVT = N.getOperand(0).getValueType();
824
825 if (!IsLoadStore && SrcVT == MVT::i8)
826 return AArch64_AM::SXTB;
827 else if (!IsLoadStore && SrcVT == MVT::i16)
828 return AArch64_AM::SXTH;
829 else if (SrcVT == MVT::i32)
830 return AArch64_AM::SXTW;
831 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
832
834 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
835 N.getOpcode() == ISD::ANY_EXTEND) {
836 EVT SrcVT = N.getOperand(0).getValueType();
837 if (!IsLoadStore && SrcVT == MVT::i8)
838 return AArch64_AM::UXTB;
839 else if (!IsLoadStore && SrcVT == MVT::i16)
840 return AArch64_AM::UXTH;
841 else if (SrcVT == MVT::i32)
842 return AArch64_AM::UXTW;
843 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
844
846 } else if (N.getOpcode() == ISD::AND) {
847 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
848 if (!CSD)
850 uint64_t AndMask = CSD->getZExtValue();
851
852 switch (AndMask) {
853 default:
855 case 0xFF:
856 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
857 case 0xFFFF:
858 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
859 case 0xFFFFFFFF:
860 return AArch64_AM::UXTW;
861 }
862 }
863
865}
866
867/// Determine whether it is worth to fold V into an extended register of an
868/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
869/// instruction, and the shift should be treated as worth folding even if has
870/// multiple uses.
871bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
872 // Trivial if we are optimizing for code size or if there is only
873 // one use of the value.
874 if (CurDAG->shouldOptForSize() || V.hasOneUse())
875 return true;
876
877 // If a subtarget has a fastpath LSL we can fold a logical shift into
878 // the add/sub and save a cycle.
879 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
880 V.getConstantOperandVal(1) <= 4 &&
882 return true;
883
884 // It hurts otherwise, since the value will be reused.
885 return false;
886}
887
888/// SelectShiftedRegister - Select a "shifted register" operand. If the value
889/// is not shifted, set the Shift operand to default of "LSL 0". The logical
890/// instructions allow the shifted register to be rotated, but the arithmetic
891/// instructions do not. The AllowROR parameter specifies whether ROR is
892/// supported.
893bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
894 SDValue &Reg, SDValue &Shift) {
895 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
896 return true;
897
899 if (ShType == AArch64_AM::InvalidShiftExtend)
900 return false;
901 if (!AllowROR && ShType == AArch64_AM::ROR)
902 return false;
903
904 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
905 unsigned BitSize = N.getValueSizeInBits();
906 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
907 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
908
909 Reg = N.getOperand(0);
910 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
911 return isWorthFoldingALU(N, true);
912 }
913
914 return false;
915}
916
917/// Instructions that accept extend modifiers like UXTW expect the register
918/// being extended to be a GPR32, but the incoming DAG might be acting on a
919/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
920/// this is the case.
922 if (N.getValueType() == MVT::i32)
923 return N;
924
925 SDLoc dl(N);
926 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
927}
928
929// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
930template<signed Low, signed High, signed Scale>
931bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
933 return false;
934
935 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
936 if ((MulImm % std::abs(Scale)) == 0) {
937 int64_t RDVLImm = MulImm / Scale;
938 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
939 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
940 return true;
941 }
942 }
943
944 return false;
945}
946
947// Returns a suitable RDSVL multiplier from a left shift.
948template <signed Low, signed High>
949bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
951 return false;
952
953 int64_t MulImm = 1LL << cast<ConstantSDNode>(N)->getSExtValue();
954 if (MulImm >= Low && MulImm <= High) {
955 Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
956 return true;
957 }
958
959 return false;
960}
961
962/// SelectArithExtendedRegister - Select a "extended register" operand. This
963/// operand folds in an extend followed by an optional left shift.
964bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
965 SDValue &Shift) {
966 unsigned ShiftVal = 0;
968
969 if (N.getOpcode() == ISD::SHL) {
970 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
971 if (!CSD)
972 return false;
973 ShiftVal = CSD->getZExtValue();
974 if (ShiftVal > 4)
975 return false;
976
977 Ext = getExtendTypeForNode(N.getOperand(0));
979 return false;
980
981 Reg = N.getOperand(0).getOperand(0);
982 } else {
983 Ext = getExtendTypeForNode(N);
985 return false;
986
987 Reg = N.getOperand(0);
988
989 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
990 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
991 auto isDef32 = [](SDValue N) {
992 unsigned Opc = N.getOpcode();
993 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
996 Opc != ISD::FREEZE;
997 };
998 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
999 isDef32(Reg))
1000 return false;
1001 }
1002
1003 // AArch64 mandates that the RHS of the operation must use the smallest
1004 // register class that could contain the size being extended from. Thus,
1005 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1006 // there might not be an actual 32-bit value in the program. We can
1007 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1008 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1009 Reg = narrowIfNeeded(CurDAG, Reg);
1010 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1011 MVT::i32);
1012 return isWorthFoldingALU(N);
1013}
1014
1015/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1016/// operand is referred by the instructions have SP operand
1017bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1018 SDValue &Shift) {
1019 unsigned ShiftVal = 0;
1021
1022 if (N.getOpcode() != ISD::SHL)
1023 return false;
1024
1025 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1026 if (!CSD)
1027 return false;
1028 ShiftVal = CSD->getZExtValue();
1029 if (ShiftVal > 4)
1030 return false;
1031
1032 Ext = AArch64_AM::UXTX;
1033 Reg = N.getOperand(0);
1034 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1035 MVT::i32);
1036 return isWorthFoldingALU(N);
1037}
1038
1039/// If there's a use of this ADDlow that's not itself a load/store then we'll
1040/// need to create a real ADD instruction from it anyway and there's no point in
1041/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1042/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1043/// leads to duplicated ADRP instructions.
1045 for (auto *User : N->users()) {
1046 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1047 User->getOpcode() != ISD::ATOMIC_LOAD &&
1048 User->getOpcode() != ISD::ATOMIC_STORE)
1049 return false;
1050
1051 // ldar and stlr have much more restrictive addressing modes (just a
1052 // register).
1053 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1054 return false;
1055 }
1056
1057 return true;
1058}
1059
1060/// Check if the immediate offset is valid as a scaled immediate.
1061static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1062 unsigned Size) {
1063 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1064 Offset < (Range << Log2_32(Size)))
1065 return true;
1066 return false;
1067}
1068
1069/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1070/// immediate" address. The "Size" argument is the size in bytes of the memory
1071/// reference, which determines the scale.
1072bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1073 unsigned BW, unsigned Size,
1074 SDValue &Base,
1075 SDValue &OffImm) {
1076 SDLoc dl(N);
1077 const DataLayout &DL = CurDAG->getDataLayout();
1078 const TargetLowering *TLI = getTargetLowering();
1079 if (N.getOpcode() == ISD::FrameIndex) {
1080 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1081 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1082 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1083 return true;
1084 }
1085
1086 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1087 // selected here doesn't support labels/immediates, only base+offset.
1088 if (CurDAG->isBaseWithConstantOffset(N)) {
1089 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1090 if (IsSignedImm) {
1091 int64_t RHSC = RHS->getSExtValue();
1092 unsigned Scale = Log2_32(Size);
1093 int64_t Range = 0x1LL << (BW - 1);
1094
1095 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1096 RHSC < (Range << Scale)) {
1097 Base = N.getOperand(0);
1098 if (Base.getOpcode() == ISD::FrameIndex) {
1099 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1100 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1101 }
1102 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1103 return true;
1104 }
1105 } else {
1106 // unsigned Immediate
1107 uint64_t RHSC = RHS->getZExtValue();
1108 unsigned Scale = Log2_32(Size);
1109 uint64_t Range = 0x1ULL << BW;
1110
1111 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1112 Base = N.getOperand(0);
1113 if (Base.getOpcode() == ISD::FrameIndex) {
1114 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1115 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1116 }
1117 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1118 return true;
1119 }
1120 }
1121 }
1122 }
1123 // Base only. The address will be materialized into a register before
1124 // the memory is accessed.
1125 // add x0, Xbase, #offset
1126 // stp x1, x2, [x0]
1127 Base = N;
1128 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1129 return true;
1130}
1131
1132/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1133/// immediate" address. The "Size" argument is the size in bytes of the memory
1134/// reference, which determines the scale.
1135bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1136 SDValue &Base, SDValue &OffImm) {
1137 SDLoc dl(N);
1138 const DataLayout &DL = CurDAG->getDataLayout();
1139 const TargetLowering *TLI = getTargetLowering();
1140 if (N.getOpcode() == ISD::FrameIndex) {
1141 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1142 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1143 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1144 return true;
1145 }
1146
1147 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1148 GlobalAddressSDNode *GAN =
1149 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1150 Base = N.getOperand(0);
1151 OffImm = N.getOperand(1);
1152 if (!GAN)
1153 return true;
1154
1155 if (GAN->getOffset() % Size == 0 &&
1157 return true;
1158 }
1159
1160 if (CurDAG->isBaseWithConstantOffset(N)) {
1161 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1162 int64_t RHSC = (int64_t)RHS->getZExtValue();
1163 unsigned Scale = Log2_32(Size);
1164 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1165 Base = N.getOperand(0);
1166 if (Base.getOpcode() == ISD::FrameIndex) {
1167 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1168 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1169 }
1170 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1171 return true;
1172 }
1173 }
1174 }
1175
1176 // Before falling back to our general case, check if the unscaled
1177 // instructions can handle this. If so, that's preferable.
1178 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1179 return false;
1180
1181 // Base only. The address will be materialized into a register before
1182 // the memory is accessed.
1183 // add x0, Xbase, #offset
1184 // ldr x0, [x0]
1185 Base = N;
1186 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1187 return true;
1188}
1189
1190/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1191/// immediate" address. This should only match when there is an offset that
1192/// is not valid for a scaled immediate addressing mode. The "Size" argument
1193/// is the size in bytes of the memory reference, which is needed here to know
1194/// what is valid for a scaled immediate.
1195bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1196 SDValue &Base,
1197 SDValue &OffImm) {
1198 if (!CurDAG->isBaseWithConstantOffset(N))
1199 return false;
1200 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1201 int64_t RHSC = RHS->getSExtValue();
1202 if (RHSC >= -256 && RHSC < 256) {
1203 Base = N.getOperand(0);
1204 if (Base.getOpcode() == ISD::FrameIndex) {
1205 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1206 const TargetLowering *TLI = getTargetLowering();
1207 Base = CurDAG->getTargetFrameIndex(
1208 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1209 }
1210 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1211 return true;
1212 }
1213 }
1214 return false;
1215}
1216
1218 SDLoc dl(N);
1219 SDValue ImpDef = SDValue(
1220 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1221 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1222 N);
1223}
1224
1225/// Check if the given SHL node (\p N), can be used to form an
1226/// extended register for an addressing mode.
1227bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1228 bool WantExtend, SDValue &Offset,
1229 SDValue &SignExtend) {
1230 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1231 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1232 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1233 return false;
1234
1235 SDLoc dl(N);
1236 if (WantExtend) {
1238 getExtendTypeForNode(N.getOperand(0), true);
1240 return false;
1241
1242 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1243 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1244 MVT::i32);
1245 } else {
1246 Offset = N.getOperand(0);
1247 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1248 }
1249
1250 unsigned LegalShiftVal = Log2_32(Size);
1251 unsigned ShiftVal = CSD->getZExtValue();
1252
1253 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1254 return false;
1255
1256 return isWorthFoldingAddr(N, Size);
1257}
1258
1259bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1261 SDValue &SignExtend,
1262 SDValue &DoShift) {
1263 if (N.getOpcode() != ISD::ADD)
1264 return false;
1265 SDValue LHS = N.getOperand(0);
1266 SDValue RHS = N.getOperand(1);
1267 SDLoc dl(N);
1268
1269 // We don't want to match immediate adds here, because they are better lowered
1270 // to the register-immediate addressing modes.
1272 return false;
1273
1274 // Check if this particular node is reused in any non-memory related
1275 // operation. If yes, do not try to fold this node into the address
1276 // computation, since the computation will be kept.
1277 const SDNode *Node = N.getNode();
1278 for (SDNode *UI : Node->users()) {
1279 if (!isMemOpOrPrefetch(UI))
1280 return false;
1281 }
1282
1283 // Remember if it is worth folding N when it produces extended register.
1284 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1285
1286 // Try to match a shifted extend on the RHS.
1287 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1288 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1289 Base = LHS;
1290 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1291 return true;
1292 }
1293
1294 // Try to match a shifted extend on the LHS.
1295 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1296 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1297 Base = RHS;
1298 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1299 return true;
1300 }
1301
1302 // There was no shift, whatever else we find.
1303 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1304
1306 // Try to match an unshifted extend on the LHS.
1307 if (IsExtendedRegisterWorthFolding &&
1308 (Ext = getExtendTypeForNode(LHS, true)) !=
1310 Base = RHS;
1311 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1312 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1313 MVT::i32);
1314 if (isWorthFoldingAddr(LHS, Size))
1315 return true;
1316 }
1317
1318 // Try to match an unshifted extend on the RHS.
1319 if (IsExtendedRegisterWorthFolding &&
1320 (Ext = getExtendTypeForNode(RHS, true)) !=
1322 Base = LHS;
1323 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1324 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1325 MVT::i32);
1326 if (isWorthFoldingAddr(RHS, Size))
1327 return true;
1328 }
1329
1330 return false;
1331}
1332
1333// Check if the given immediate is preferred by ADD. If an immediate can be
1334// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1335// encoded by one MOVZ, return true.
1336static bool isPreferredADD(int64_t ImmOff) {
1337 // Constant in [0x0, 0xfff] can be encoded in ADD.
1338 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1339 return true;
1340 // Check if it can be encoded in an "ADD LSL #12".
1341 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1342 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1343 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1344 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1345 return false;
1346}
1347
1348bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1350 SDValue &SignExtend,
1351 SDValue &DoShift) {
1352 if (N.getOpcode() != ISD::ADD)
1353 return false;
1354 SDValue LHS = N.getOperand(0);
1355 SDValue RHS = N.getOperand(1);
1356 SDLoc DL(N);
1357
1358 // Check if this particular node is reused in any non-memory related
1359 // operation. If yes, do not try to fold this node into the address
1360 // computation, since the computation will be kept.
1361 const SDNode *Node = N.getNode();
1362 for (SDNode *UI : Node->users()) {
1363 if (!isMemOpOrPrefetch(UI))
1364 return false;
1365 }
1366
1367 // Watch out if RHS is a wide immediate, it can not be selected into
1368 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1369 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1370 // instructions like:
1371 // MOV X0, WideImmediate
1372 // ADD X1, BaseReg, X0
1373 // LDR X2, [X1, 0]
1374 // For such situation, using [BaseReg, XReg] addressing mode can save one
1375 // ADD/SUB:
1376 // MOV X0, WideImmediate
1377 // LDR X2, [BaseReg, X0]
1378 if (isa<ConstantSDNode>(RHS)) {
1379 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1380 // Skip the immediate can be selected by load/store addressing mode.
1381 // Also skip the immediate can be encoded by a single ADD (SUB is also
1382 // checked by using -ImmOff).
1383 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1384 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1385 return false;
1386
1387 SDValue Ops[] = { RHS };
1388 SDNode *MOVI =
1389 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1390 SDValue MOVIV = SDValue(MOVI, 0);
1391 // This ADD of two X register will be selected into [Reg+Reg] mode.
1392 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1393 }
1394
1395 // Remember if it is worth folding N when it produces extended register.
1396 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1397
1398 // Try to match a shifted extend on the RHS.
1399 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1400 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1401 Base = LHS;
1402 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1403 return true;
1404 }
1405
1406 // Try to match a shifted extend on the LHS.
1407 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1408 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1409 Base = RHS;
1410 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1411 return true;
1412 }
1413
1414 // Match any non-shifted, non-extend, non-immediate add expression.
1415 Base = LHS;
1416 Offset = RHS;
1417 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1418 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1419 // Reg1 + Reg2 is free: no check needed.
1420 return true;
1421}
1422
1423SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1424 static const unsigned RegClassIDs[] = {
1425 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1426 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1427 AArch64::dsub2, AArch64::dsub3};
1428
1429 return createTuple(Regs, RegClassIDs, SubRegs);
1430}
1431
1432SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1433 static const unsigned RegClassIDs[] = {
1434 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1435 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1436 AArch64::qsub2, AArch64::qsub3};
1437
1438 return createTuple(Regs, RegClassIDs, SubRegs);
1439}
1440
1441SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1442 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1443 AArch64::ZPR3RegClassID,
1444 AArch64::ZPR4RegClassID};
1445 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1446 AArch64::zsub2, AArch64::zsub3};
1447
1448 return createTuple(Regs, RegClassIDs, SubRegs);
1449}
1450
1451SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1452 assert(Regs.size() == 2 || Regs.size() == 4);
1453
1454 // The createTuple interface requires 3 RegClassIDs for each possible
1455 // tuple type even though we only have them for ZPR2 and ZPR4.
1456 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1457 AArch64::ZPR4Mul4RegClassID};
1458 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1459 AArch64::zsub2, AArch64::zsub3};
1460 return createTuple(Regs, RegClassIDs, SubRegs);
1461}
1462
1463SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1464 const unsigned RegClassIDs[],
1465 const unsigned SubRegs[]) {
1466 // There's no special register-class for a vector-list of 1 element: it's just
1467 // a vector.
1468 if (Regs.size() == 1)
1469 return Regs[0];
1470
1471 assert(Regs.size() >= 2 && Regs.size() <= 4);
1472
1473 SDLoc DL(Regs[0]);
1474
1476
1477 // First operand of REG_SEQUENCE is the desired RegClass.
1478 Ops.push_back(
1479 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1480
1481 // Then we get pairs of source & subregister-position for the components.
1482 for (unsigned i = 0; i < Regs.size(); ++i) {
1483 Ops.push_back(Regs[i]);
1484 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1485 }
1486
1487 SDNode *N =
1488 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1489 return SDValue(N, 0);
1490}
1491
1492void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1493 bool isExt) {
1494 SDLoc dl(N);
1495 EVT VT = N->getValueType(0);
1496
1497 unsigned ExtOff = isExt;
1498
1499 // Form a REG_SEQUENCE to force register allocation.
1500 unsigned Vec0Off = ExtOff + 1;
1501 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1502 SDValue RegSeq = createQTuple(Regs);
1503
1505 if (isExt)
1506 Ops.push_back(N->getOperand(1));
1507 Ops.push_back(RegSeq);
1508 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1509 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1510}
1511
1512static std::tuple<SDValue, SDValue>
1514 SDLoc DL(Disc);
1515 SDValue AddrDisc;
1516 SDValue ConstDisc;
1517
1518 // If this is a blend, remember the constant and address discriminators.
1519 // Otherwise, it's either a constant discriminator, or a non-blended
1520 // address discriminator.
1521 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1522 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1523 AddrDisc = Disc->getOperand(1);
1524 ConstDisc = Disc->getOperand(2);
1525 } else {
1526 ConstDisc = Disc;
1527 }
1528
1529 // If the constant discriminator (either the blend RHS, or the entire
1530 // discriminator value) isn't a 16-bit constant, bail out, and let the
1531 // discriminator be computed separately.
1532 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1533 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1534 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1535
1536 // If there's no address discriminator, use XZR directly.
1537 if (!AddrDisc)
1538 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1539
1540 return std::make_tuple(
1541 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1542 AddrDisc);
1543}
1544
1545void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1546 SDLoc DL(N);
1547 // IntrinsicID is operand #0
1548 SDValue Val = N->getOperand(1);
1549 SDValue AUTKey = N->getOperand(2);
1550 SDValue AUTDisc = N->getOperand(3);
1551
1552 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1553 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1554
1555 SDValue AUTAddrDisc, AUTConstDisc;
1556 std::tie(AUTConstDisc, AUTAddrDisc) =
1557 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1558
1559 if (!Subtarget->isX16X17Safer()) {
1560 std::vector<SDValue> Ops = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1561 // Copy deactivation symbol if present.
1562 if (N->getNumOperands() > 4)
1563 Ops.push_back(N->getOperand(4));
1564
1565 SDNode *AUT =
1566 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1567 ReplaceNode(N, AUT);
1568 } else {
1569 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1570 AArch64::X16, Val, SDValue());
1571 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1572
1573 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1574 ReplaceNode(N, AUT);
1575 }
1576}
1577
1578void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1579 SDLoc DL(N);
1580 // IntrinsicID is operand #0
1581 SDValue Val = N->getOperand(1);
1582 SDValue AUTKey = N->getOperand(2);
1583 SDValue AUTDisc = N->getOperand(3);
1584 SDValue PACKey = N->getOperand(4);
1585 SDValue PACDisc = N->getOperand(5);
1586
1587 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1588 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1589
1590 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1591 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1592
1593 SDValue AUTAddrDisc, AUTConstDisc;
1594 std::tie(AUTConstDisc, AUTAddrDisc) =
1595 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1596
1597 SDValue PACAddrDisc, PACConstDisc;
1598 std::tie(PACConstDisc, PACAddrDisc) =
1599 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1600
1601 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1602 AArch64::X16, Val, SDValue());
1603
1604 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1605 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1606
1607 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1608 ReplaceNode(N, AUTPAC);
1609}
1610
1611bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1612 LoadSDNode *LD = cast<LoadSDNode>(N);
1613 if (LD->isUnindexed())
1614 return false;
1615 EVT VT = LD->getMemoryVT();
1616 EVT DstVT = N->getValueType(0);
1617 ISD::MemIndexedMode AM = LD->getAddressingMode();
1618 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1619 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1620 int OffsetVal = (int)OffsetOp->getZExtValue();
1621
1622 // We're not doing validity checking here. That was done when checking
1623 // if we should mark the load as indexed or not. We're just selecting
1624 // the right instruction.
1625 unsigned Opcode = 0;
1626
1627 ISD::LoadExtType ExtType = LD->getExtensionType();
1628 bool InsertTo64 = false;
1629 if (VT == MVT::i64)
1630 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1631 else if (VT == MVT::i32) {
1632 if (ExtType == ISD::NON_EXTLOAD)
1633 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1634 else if (ExtType == ISD::SEXTLOAD)
1635 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1636 else {
1637 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1638 InsertTo64 = true;
1639 // The result of the load is only i32. It's the subreg_to_reg that makes
1640 // it into an i64.
1641 DstVT = MVT::i32;
1642 }
1643 } else if (VT == MVT::i16) {
1644 if (ExtType == ISD::SEXTLOAD) {
1645 if (DstVT == MVT::i64)
1646 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1647 else
1648 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1649 } else {
1650 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1651 InsertTo64 = DstVT == MVT::i64;
1652 // The result of the load is only i32. It's the subreg_to_reg that makes
1653 // it into an i64.
1654 DstVT = MVT::i32;
1655 }
1656 } else if (VT == MVT::i8) {
1657 if (ExtType == ISD::SEXTLOAD) {
1658 if (DstVT == MVT::i64)
1659 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1660 else
1661 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1662 } else {
1663 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1664 InsertTo64 = DstVT == MVT::i64;
1665 // The result of the load is only i32. It's the subreg_to_reg that makes
1666 // it into an i64.
1667 DstVT = MVT::i32;
1668 }
1669 } else if (VT == MVT::f16) {
1670 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1671 } else if (VT == MVT::bf16) {
1672 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1673 } else if (VT == MVT::f32) {
1674 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1675 } else if (VT == MVT::f64 ||
1676 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1677 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1678 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1679 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1680 } else if (VT.is64BitVector()) {
1681 if (IsPre || OffsetVal != 8)
1682 return false;
1683 switch (VT.getScalarSizeInBits()) {
1684 case 8:
1685 Opcode = AArch64::LD1Onev8b_POST;
1686 break;
1687 case 16:
1688 Opcode = AArch64::LD1Onev4h_POST;
1689 break;
1690 case 32:
1691 Opcode = AArch64::LD1Onev2s_POST;
1692 break;
1693 case 64:
1694 Opcode = AArch64::LD1Onev1d_POST;
1695 break;
1696 default:
1697 llvm_unreachable("Expected vector element to be a power of 2");
1698 }
1699 } else if (VT.is128BitVector()) {
1700 if (IsPre || OffsetVal != 16)
1701 return false;
1702 switch (VT.getScalarSizeInBits()) {
1703 case 8:
1704 Opcode = AArch64::LD1Onev16b_POST;
1705 break;
1706 case 16:
1707 Opcode = AArch64::LD1Onev8h_POST;
1708 break;
1709 case 32:
1710 Opcode = AArch64::LD1Onev4s_POST;
1711 break;
1712 case 64:
1713 Opcode = AArch64::LD1Onev2d_POST;
1714 break;
1715 default:
1716 llvm_unreachable("Expected vector element to be a power of 2");
1717 }
1718 } else
1719 return false;
1720 SDValue Chain = LD->getChain();
1721 SDValue Base = LD->getBasePtr();
1722 SDLoc dl(N);
1723 // LD1 encodes an immediate offset by using XZR as the offset register.
1724 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1725 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1726 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1727 SDValue Ops[] = { Base, Offset, Chain };
1728 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1729 MVT::Other, Ops);
1730
1731 // Transfer memoperands.
1732 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1733 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1734
1735 // Either way, we're replacing the node, so tell the caller that.
1736 SDValue LoadedVal = SDValue(Res, 1);
1737 if (InsertTo64) {
1738 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1739 LoadedVal =
1740 SDValue(CurDAG->getMachineNode(
1741 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1742 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1743 SubReg),
1744 0);
1745 }
1746
1747 ReplaceUses(SDValue(N, 0), LoadedVal);
1748 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1749 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1750 CurDAG->RemoveDeadNode(N);
1751 return true;
1752}
1753
1754void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1755 unsigned SubRegIdx) {
1756 SDLoc dl(N);
1757 EVT VT = N->getValueType(0);
1758 SDValue Chain = N->getOperand(0);
1759
1760 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1761 Chain};
1762
1763 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1764
1765 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1766 SDValue SuperReg = SDValue(Ld, 0);
1767 for (unsigned i = 0; i < NumVecs; ++i)
1768 ReplaceUses(SDValue(N, i),
1769 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1770
1771 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1772
1773 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1774 // because it's too simple to have needed special treatment during lowering.
1775 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1776 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1777 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1778 }
1779
1780 CurDAG->RemoveDeadNode(N);
1781}
1782
1783void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1784 unsigned Opc, unsigned SubRegIdx) {
1785 SDLoc dl(N);
1786 EVT VT = N->getValueType(0);
1787 SDValue Chain = N->getOperand(0);
1788
1789 SDValue Ops[] = {N->getOperand(1), // Mem operand
1790 N->getOperand(2), // Incremental
1791 Chain};
1792
1793 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1794 MVT::Untyped, MVT::Other};
1795
1796 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1797
1798 // Update uses of write back register
1799 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1800
1801 // Update uses of vector list
1802 SDValue SuperReg = SDValue(Ld, 1);
1803 if (NumVecs == 1)
1804 ReplaceUses(SDValue(N, 0), SuperReg);
1805 else
1806 for (unsigned i = 0; i < NumVecs; ++i)
1807 ReplaceUses(SDValue(N, i),
1808 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1809
1810 // Update the chain
1811 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1812 CurDAG->RemoveDeadNode(N);
1813}
1814
1815/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1816/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1817/// new Base and an SDValue representing the new offset.
1818std::tuple<unsigned, SDValue, SDValue>
1819AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1820 unsigned Opc_ri,
1821 const SDValue &OldBase,
1822 const SDValue &OldOffset,
1823 unsigned Scale) {
1824 SDValue NewBase = OldBase;
1825 SDValue NewOffset = OldOffset;
1826 // Detect a possible Reg+Imm addressing mode.
1827 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1828 N, OldBase, NewBase, NewOffset);
1829
1830 // Detect a possible reg+reg addressing mode, but only if we haven't already
1831 // detected a Reg+Imm one.
1832 const bool IsRegReg =
1833 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1834
1835 // Select the instruction.
1836 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1837}
1838
1839enum class SelectTypeKind {
1840 Int1 = 0,
1841 Int = 1,
1842 FP = 2,
1844};
1845
1846/// This function selects an opcode from a list of opcodes, which is
1847/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1848/// element types, in this order.
1849template <SelectTypeKind Kind>
1850static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1851 // Only match scalable vector VTs
1852 if (!VT.isScalableVector())
1853 return 0;
1854
1855 EVT EltVT = VT.getVectorElementType();
1856 unsigned Key = VT.getVectorMinNumElements();
1857 switch (Kind) {
1859 break;
1861 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1862 EltVT != MVT::i64)
1863 return 0;
1864 break;
1866 if (EltVT != MVT::i1)
1867 return 0;
1868 break;
1869 case SelectTypeKind::FP:
1870 if (EltVT == MVT::bf16)
1871 Key = 16;
1872 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1873 EltVT != MVT::f64)
1874 return 0;
1875 break;
1876 }
1877
1878 unsigned Offset;
1879 switch (Key) {
1880 case 16: // 8-bit or bf16
1881 Offset = 0;
1882 break;
1883 case 8: // 16-bit
1884 Offset = 1;
1885 break;
1886 case 4: // 32-bit
1887 Offset = 2;
1888 break;
1889 case 2: // 64-bit
1890 Offset = 3;
1891 break;
1892 default:
1893 return 0;
1894 }
1895
1896 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1897}
1898
1899// This function is almost identical to SelectWhilePair, but has an
1900// extra check on the range of the immediate operand.
1901// TODO: Merge these two functions together at some point?
1902void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1903 // Immediate can be either 0 or 1.
1904 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1905 if (Imm->getZExtValue() > 1)
1906 return;
1907
1908 SDLoc DL(N);
1909 EVT VT = N->getValueType(0);
1910 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1911 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1912 SDValue SuperReg = SDValue(WhilePair, 0);
1913
1914 for (unsigned I = 0; I < 2; ++I)
1915 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1916 AArch64::psub0 + I, DL, VT, SuperReg));
1917
1918 CurDAG->RemoveDeadNode(N);
1919}
1920
1921void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1922 SDLoc DL(N);
1923 EVT VT = N->getValueType(0);
1924
1925 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1926
1927 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1928 SDValue SuperReg = SDValue(WhilePair, 0);
1929
1930 for (unsigned I = 0; I < 2; ++I)
1931 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1932 AArch64::psub0 + I, DL, VT, SuperReg));
1933
1934 CurDAG->RemoveDeadNode(N);
1935}
1936
1937void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1938 unsigned Opcode) {
1939 EVT VT = N->getValueType(0);
1940 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
1941 SDValue Ops = createZTuple(Regs);
1942 SDLoc DL(N);
1943 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1944 SDValue SuperReg = SDValue(Intrinsic, 0);
1945 for (unsigned i = 0; i < NumVecs; ++i)
1946 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1947 AArch64::zsub0 + i, DL, VT, SuperReg));
1948
1949 CurDAG->RemoveDeadNode(N);
1950}
1951
1952void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
1953 unsigned Opcode) {
1954 SDLoc DL(N);
1955 EVT VT = N->getValueType(0);
1956 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
1957 Ops.push_back(/*Chain*/ N->getOperand(0));
1958
1959 SDNode *Instruction =
1960 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
1961 SDValue SuperReg = SDValue(Instruction, 0);
1962
1963 for (unsigned i = 0; i < NumVecs; ++i)
1964 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1965 AArch64::zsub0 + i, DL, VT, SuperReg));
1966
1967 // Copy chain
1968 unsigned ChainIdx = NumVecs;
1969 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
1970 CurDAG->RemoveDeadNode(N);
1971}
1972
1973void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1974 unsigned NumVecs,
1975 bool IsZmMulti,
1976 unsigned Opcode,
1977 bool HasPred) {
1978 assert(Opcode != 0 && "Unexpected opcode");
1979
1980 SDLoc DL(N);
1981 EVT VT = N->getValueType(0);
1982 unsigned FirstVecIdx = HasPred ? 2 : 1;
1983
1984 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1985 SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs));
1986 return createZMulTuple(Regs);
1987 };
1988
1989 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1990
1991 SDValue Zm;
1992 if (IsZmMulti)
1993 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1994 else
1995 Zm = N->getOperand(NumVecs + FirstVecIdx);
1996
1997 SDNode *Intrinsic;
1998 if (HasPred)
1999 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
2000 N->getOperand(1), Zdn, Zm);
2001 else
2002 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
2003 SDValue SuperReg = SDValue(Intrinsic, 0);
2004 for (unsigned i = 0; i < NumVecs; ++i)
2005 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2006 AArch64::zsub0 + i, DL, VT, SuperReg));
2007
2008 CurDAG->RemoveDeadNode(N);
2009}
2010
2011void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2012 unsigned Scale, unsigned Opc_ri,
2013 unsigned Opc_rr, bool IsIntr) {
2014 assert(Scale < 5 && "Invalid scaling value.");
2015 SDLoc DL(N);
2016 EVT VT = N->getValueType(0);
2017 SDValue Chain = N->getOperand(0);
2018
2019 // Optimize addressing mode.
2021 unsigned Opc;
2022 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2023 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
2024 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2025
2026 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2027 Base, // Memory operand
2028 Offset, Chain};
2029
2030 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2031
2032 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2033 SDValue SuperReg = SDValue(Load, 0);
2034 for (unsigned i = 0; i < NumVecs; ++i)
2035 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2036 AArch64::zsub0 + i, DL, VT, SuperReg));
2037
2038 // Copy chain
2039 unsigned ChainIdx = NumVecs;
2040 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2041 CurDAG->RemoveDeadNode(N);
2042}
2043
2044void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2045 unsigned NumVecs,
2046 unsigned Scale,
2047 unsigned Opc_ri,
2048 unsigned Opc_rr) {
2049 assert(Scale < 4 && "Invalid scaling value.");
2050 SDLoc DL(N);
2051 EVT VT = N->getValueType(0);
2052 SDValue Chain = N->getOperand(0);
2053
2054 SDValue PNg = N->getOperand(2);
2055 SDValue Base = N->getOperand(3);
2056 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2057 unsigned Opc;
2058 std::tie(Opc, Base, Offset) =
2059 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2060
2061 SDValue Ops[] = {PNg, // Predicate-as-counter
2062 Base, // Memory operand
2063 Offset, Chain};
2064
2065 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2066
2067 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2068 SDValue SuperReg = SDValue(Load, 0);
2069 for (unsigned i = 0; i < NumVecs; ++i)
2070 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2071 AArch64::zsub0 + i, DL, VT, SuperReg));
2072
2073 // Copy chain
2074 unsigned ChainIdx = NumVecs;
2075 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2076 CurDAG->RemoveDeadNode(N);
2077}
2078
2079void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2080 unsigned Opcode) {
2081 if (N->getValueType(0) != MVT::nxv4f32)
2082 return;
2083 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2084}
2085
2086void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2087 unsigned NumOutVecs,
2088 unsigned Opc,
2089 uint32_t MaxImm) {
2090 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2091 if (Imm->getZExtValue() > MaxImm)
2092 return;
2093
2094 SDValue ZtValue;
2095 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2096 return;
2097
2098 SDValue Chain = Node->getOperand(0);
2099 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4), Chain};
2100 SDLoc DL(Node);
2101 EVT VT = Node->getValueType(0);
2102
2103 SDNode *Instruction =
2104 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2105 SDValue SuperReg = SDValue(Instruction, 0);
2106
2107 for (unsigned I = 0; I < NumOutVecs; ++I)
2108 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2109 AArch64::zsub0 + I, DL, VT, SuperReg));
2110
2111 // Copy chain
2112 unsigned ChainIdx = NumOutVecs;
2113 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2114 CurDAG->RemoveDeadNode(Node);
2115}
2116
2117void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2118 unsigned NumOutVecs,
2119 unsigned Opc) {
2120 SDValue ZtValue;
2121 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2122 return;
2123
2124 SDValue Chain = Node->getOperand(0);
2125 SDValue Ops[] = {ZtValue,
2126 createZMulTuple({Node->getOperand(3), Node->getOperand(4)}),
2127 Chain};
2128
2129 SDLoc DL(Node);
2130 EVT VT = Node->getValueType(0);
2131
2132 SDNode *Instruction =
2133 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2134 SDValue SuperReg = SDValue(Instruction, 0);
2135
2136 for (unsigned I = 0; I < NumOutVecs; ++I)
2137 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2138 AArch64::zsub0 + I, DL, VT, SuperReg));
2139
2140 // Copy chain
2141 unsigned ChainIdx = NumOutVecs;
2142 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2143 CurDAG->RemoveDeadNode(Node);
2144}
2145
2146void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2147 unsigned Op) {
2148 SDLoc DL(N);
2149 EVT VT = N->getValueType(0);
2150
2151 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2152 SDValue Zd = createZMulTuple(Regs);
2153 SDValue Zn = N->getOperand(1 + NumVecs);
2154 SDValue Zm = N->getOperand(2 + NumVecs);
2155
2156 SDValue Ops[] = {Zd, Zn, Zm};
2157
2158 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2159 SDValue SuperReg = SDValue(Intrinsic, 0);
2160 for (unsigned i = 0; i < NumVecs; ++i)
2161 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2162 AArch64::zsub0 + i, DL, VT, SuperReg));
2163
2164 CurDAG->RemoveDeadNode(N);
2165}
2166
2167bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2168 switch (BaseReg) {
2169 default:
2170 return false;
2171 case AArch64::ZA:
2172 case AArch64::ZAB0:
2173 if (TileNum == 0)
2174 break;
2175 return false;
2176 case AArch64::ZAH0:
2177 if (TileNum <= 1)
2178 break;
2179 return false;
2180 case AArch64::ZAS0:
2181 if (TileNum <= 3)
2182 break;
2183 return false;
2184 case AArch64::ZAD0:
2185 if (TileNum <= 7)
2186 break;
2187 return false;
2188 }
2189
2190 BaseReg += TileNum;
2191 return true;
2192}
2193
2194template <unsigned MaxIdx, unsigned Scale>
2195void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2196 unsigned BaseReg, unsigned Op) {
2197 unsigned TileNum = 0;
2198 if (BaseReg != AArch64::ZA)
2199 TileNum = N->getConstantOperandVal(2);
2200
2201 if (!SelectSMETile(BaseReg, TileNum))
2202 return;
2203
2204 SDValue SliceBase, Base, Offset;
2205 if (BaseReg == AArch64::ZA)
2206 SliceBase = N->getOperand(2);
2207 else
2208 SliceBase = N->getOperand(3);
2209
2210 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2211 return;
2212
2213 SDLoc DL(N);
2214 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2215 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2216 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2217
2218 EVT VT = N->getValueType(0);
2219 for (unsigned I = 0; I < NumVecs; ++I)
2220 ReplaceUses(SDValue(N, I),
2221 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2222 SDValue(Mov, 0)));
2223 // Copy chain
2224 unsigned ChainIdx = NumVecs;
2225 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2226 CurDAG->RemoveDeadNode(N);
2227}
2228
2229void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2230 unsigned Op, unsigned MaxIdx,
2231 unsigned Scale, unsigned BaseReg) {
2232 // Slice can be in different positions
2233 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2234 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2235 SDValue SliceBase = N->getOperand(2);
2236 if (BaseReg != AArch64::ZA)
2237 SliceBase = N->getOperand(3);
2238
2240 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2241 return;
2242 // The correct Za tile number is computed in Machine Instruction
2243 // See EmitZAInstr
2244 // DAG cannot select Za tile as an output register with ZReg
2245 SDLoc DL(N);
2247 if (BaseReg != AArch64::ZA )
2248 Ops.push_back(N->getOperand(2));
2249 Ops.push_back(Base);
2250 Ops.push_back(Offset);
2251 Ops.push_back(N->getOperand(0)); //Chain
2252 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2253
2254 EVT VT = N->getValueType(0);
2255 for (unsigned I = 0; I < NumVecs; ++I)
2256 ReplaceUses(SDValue(N, I),
2257 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2258 SDValue(Mov, 0)));
2259
2260 // Copy chain
2261 unsigned ChainIdx = NumVecs;
2262 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2263 CurDAG->RemoveDeadNode(N);
2264}
2265
2266void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2267 unsigned NumOutVecs,
2268 bool IsTupleInput,
2269 unsigned Opc) {
2270 SDLoc DL(N);
2271 EVT VT = N->getValueType(0);
2272 unsigned NumInVecs = N->getNumOperands() - 1;
2273
2275 if (IsTupleInput) {
2276 assert((NumInVecs == 2 || NumInVecs == 4) &&
2277 "Don't know how to handle multi-register input!");
2278 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2279 Ops.push_back(createZMulTuple(Regs));
2280 } else {
2281 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2282 for (unsigned I = 0; I < NumInVecs; I++)
2283 Ops.push_back(N->getOperand(1 + I));
2284 }
2285
2286 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2287 SDValue SuperReg = SDValue(Res, 0);
2288
2289 for (unsigned I = 0; I < NumOutVecs; I++)
2290 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2291 AArch64::zsub0 + I, DL, VT, SuperReg));
2292 CurDAG->RemoveDeadNode(N);
2293}
2294
2295void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2296 unsigned Opc) {
2297 SDLoc dl(N);
2298 EVT VT = N->getOperand(2)->getValueType(0);
2299
2300 // Form a REG_SEQUENCE to force register allocation.
2301 bool Is128Bit = VT.getSizeInBits() == 128;
2302 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2303 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2304
2305 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2306 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2307
2308 // Transfer memoperands.
2309 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2310 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2311
2312 ReplaceNode(N, St);
2313}
2314
2315void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2316 unsigned Scale, unsigned Opc_rr,
2317 unsigned Opc_ri) {
2318 SDLoc dl(N);
2319
2320 // Form a REG_SEQUENCE to force register allocation.
2321 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2322 SDValue RegSeq = createZTuple(Regs);
2323
2324 // Optimize addressing mode.
2325 unsigned Opc;
2327 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2328 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2329 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2330
2331 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2332 Base, // address
2333 Offset, // offset
2334 N->getOperand(0)}; // chain
2335 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2336
2337 ReplaceNode(N, St);
2338}
2339
2340bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2341 SDValue &OffImm) {
2342 SDLoc dl(N);
2343 const DataLayout &DL = CurDAG->getDataLayout();
2344 const TargetLowering *TLI = getTargetLowering();
2345
2346 // Try to match it for the frame address
2347 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2348 int FI = FINode->getIndex();
2349 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2350 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2351 return true;
2352 }
2353
2354 return false;
2355}
2356
2357void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2358 unsigned Opc) {
2359 SDLoc dl(N);
2360 EVT VT = N->getOperand(2)->getValueType(0);
2361 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2362 MVT::Other}; // Type for the Chain
2363
2364 // Form a REG_SEQUENCE to force register allocation.
2365 bool Is128Bit = VT.getSizeInBits() == 128;
2366 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2367 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2368
2369 SDValue Ops[] = {RegSeq,
2370 N->getOperand(NumVecs + 1), // base register
2371 N->getOperand(NumVecs + 2), // Incremental
2372 N->getOperand(0)}; // Chain
2373 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2374
2375 ReplaceNode(N, St);
2376}
2377
2378namespace {
2379/// WidenVector - Given a value in the V64 register class, produce the
2380/// equivalent value in the V128 register class.
2381class WidenVector {
2382 SelectionDAG &DAG;
2383
2384public:
2385 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2386
2387 SDValue operator()(SDValue V64Reg) {
2388 EVT VT = V64Reg.getValueType();
2389 unsigned NarrowSize = VT.getVectorNumElements();
2390 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2391 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2392 SDLoc DL(V64Reg);
2393
2394 SDValue Undef =
2395 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2396 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2397 }
2398};
2399} // namespace
2400
2401/// NarrowVector - Given a value in the V128 register class, produce the
2402/// equivalent value in the V64 register class.
2404 EVT VT = V128Reg.getValueType();
2405 unsigned WideSize = VT.getVectorNumElements();
2406 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2407 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2408
2409 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2410 V128Reg);
2411}
2412
2413void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2414 unsigned Opc) {
2415 SDLoc dl(N);
2416 EVT VT = N->getValueType(0);
2417 bool Narrow = VT.getSizeInBits() == 64;
2418
2419 // Form a REG_SEQUENCE to force register allocation.
2420 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2421
2422 if (Narrow)
2423 transform(Regs, Regs.begin(),
2424 WidenVector(*CurDAG));
2425
2426 SDValue RegSeq = createQTuple(Regs);
2427
2428 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2429
2430 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2431
2432 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2433 N->getOperand(NumVecs + 3), N->getOperand(0)};
2434 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2435 SDValue SuperReg = SDValue(Ld, 0);
2436
2437 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2438 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2439 AArch64::qsub2, AArch64::qsub3 };
2440 for (unsigned i = 0; i < NumVecs; ++i) {
2441 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2442 if (Narrow)
2443 NV = NarrowVector(NV, *CurDAG);
2444 ReplaceUses(SDValue(N, i), NV);
2445 }
2446
2447 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2448 CurDAG->RemoveDeadNode(N);
2449}
2450
2451void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2452 unsigned Opc) {
2453 SDLoc dl(N);
2454 EVT VT = N->getValueType(0);
2455 bool Narrow = VT.getSizeInBits() == 64;
2456
2457 // Form a REG_SEQUENCE to force register allocation.
2458 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2459
2460 if (Narrow)
2461 transform(Regs, Regs.begin(),
2462 WidenVector(*CurDAG));
2463
2464 SDValue RegSeq = createQTuple(Regs);
2465
2466 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2467 RegSeq->getValueType(0), MVT::Other};
2468
2469 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2470
2471 SDValue Ops[] = {RegSeq,
2472 CurDAG->getTargetConstant(LaneNo, dl,
2473 MVT::i64), // Lane Number
2474 N->getOperand(NumVecs + 2), // Base register
2475 N->getOperand(NumVecs + 3), // Incremental
2476 N->getOperand(0)};
2477 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2478
2479 // Update uses of the write back register
2480 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2481
2482 // Update uses of the vector list
2483 SDValue SuperReg = SDValue(Ld, 1);
2484 if (NumVecs == 1) {
2485 ReplaceUses(SDValue(N, 0),
2486 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2487 } else {
2488 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2489 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2490 AArch64::qsub2, AArch64::qsub3 };
2491 for (unsigned i = 0; i < NumVecs; ++i) {
2492 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2493 SuperReg);
2494 if (Narrow)
2495 NV = NarrowVector(NV, *CurDAG);
2496 ReplaceUses(SDValue(N, i), NV);
2497 }
2498 }
2499
2500 // Update the Chain
2501 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2502 CurDAG->RemoveDeadNode(N);
2503}
2504
2505void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2506 unsigned Opc) {
2507 SDLoc dl(N);
2508 EVT VT = N->getOperand(2)->getValueType(0);
2509 bool Narrow = VT.getSizeInBits() == 64;
2510
2511 // Form a REG_SEQUENCE to force register allocation.
2512 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2513
2514 if (Narrow)
2515 transform(Regs, Regs.begin(),
2516 WidenVector(*CurDAG));
2517
2518 SDValue RegSeq = createQTuple(Regs);
2519
2520 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2521
2522 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2523 N->getOperand(NumVecs + 3), N->getOperand(0)};
2524 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2525
2526 // Transfer memoperands.
2527 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2528 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2529
2530 ReplaceNode(N, St);
2531}
2532
2533void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2534 unsigned Opc) {
2535 SDLoc dl(N);
2536 EVT VT = N->getOperand(2)->getValueType(0);
2537 bool Narrow = VT.getSizeInBits() == 64;
2538
2539 // Form a REG_SEQUENCE to force register allocation.
2540 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2541
2542 if (Narrow)
2543 transform(Regs, Regs.begin(),
2544 WidenVector(*CurDAG));
2545
2546 SDValue RegSeq = createQTuple(Regs);
2547
2548 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2549 MVT::Other};
2550
2551 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2552
2553 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2554 N->getOperand(NumVecs + 2), // Base Register
2555 N->getOperand(NumVecs + 3), // Incremental
2556 N->getOperand(0)};
2557 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2558
2559 // Transfer memoperands.
2560 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2561 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2562
2563 ReplaceNode(N, St);
2564}
2565
2567 unsigned &Opc, SDValue &Opd0,
2568 unsigned &LSB, unsigned &MSB,
2569 unsigned NumberOfIgnoredLowBits,
2570 bool BiggerPattern) {
2571 assert(N->getOpcode() == ISD::AND &&
2572 "N must be a AND operation to call this function");
2573
2574 EVT VT = N->getValueType(0);
2575
2576 // Here we can test the type of VT and return false when the type does not
2577 // match, but since it is done prior to that call in the current context
2578 // we turned that into an assert to avoid redundant code.
2579 assert((VT == MVT::i32 || VT == MVT::i64) &&
2580 "Type checking must have been done before calling this function");
2581
2582 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2583 // changed the AND node to a 32-bit mask operation. We'll have to
2584 // undo that as part of the transform here if we want to catch all
2585 // the opportunities.
2586 // Currently the NumberOfIgnoredLowBits argument helps to recover
2587 // from these situations when matching bigger pattern (bitfield insert).
2588
2589 // For unsigned extracts, check for a shift right and mask
2590 uint64_t AndImm = 0;
2591 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2592 return false;
2593
2594 const SDNode *Op0 = N->getOperand(0).getNode();
2595
2596 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2597 // simplified. Try to undo that
2598 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2599
2600 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2601 if (AndImm & (AndImm + 1))
2602 return false;
2603
2604 bool ClampMSB = false;
2605 uint64_t SrlImm = 0;
2606 // Handle the SRL + ANY_EXTEND case.
2607 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2608 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2609 // Extend the incoming operand of the SRL to 64-bit.
2610 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2611 // Make sure to clamp the MSB so that we preserve the semantics of the
2612 // original operations.
2613 ClampMSB = true;
2614 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2616 SrlImm)) {
2617 // If the shift result was truncated, we can still combine them.
2618 Opd0 = Op0->getOperand(0).getOperand(0);
2619
2620 // Use the type of SRL node.
2621 VT = Opd0->getValueType(0);
2622 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2623 Opd0 = Op0->getOperand(0);
2624 ClampMSB = (VT == MVT::i32);
2625 } else if (BiggerPattern) {
2626 // Let's pretend a 0 shift right has been performed.
2627 // The resulting code will be at least as good as the original one
2628 // plus it may expose more opportunities for bitfield insert pattern.
2629 // FIXME: Currently we limit this to the bigger pattern, because
2630 // some optimizations expect AND and not UBFM.
2631 Opd0 = N->getOperand(0);
2632 } else
2633 return false;
2634
2635 // Bail out on large immediates. This happens when no proper
2636 // combining/constant folding was performed.
2637 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2638 LLVM_DEBUG(
2639 (dbgs() << N
2640 << ": Found large shift immediate, this should not happen\n"));
2641 return false;
2642 }
2643
2644 LSB = SrlImm;
2645 MSB = SrlImm +
2646 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2647 : llvm::countr_one<uint64_t>(AndImm)) -
2648 1;
2649 if (ClampMSB)
2650 // Since we're moving the extend before the right shift operation, we need
2651 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2652 // the zeros which would get shifted in with the original right shift
2653 // operation.
2654 MSB = MSB > 31 ? 31 : MSB;
2655
2656 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2657 return true;
2658}
2659
2661 SDValue &Opd0, unsigned &Immr,
2662 unsigned &Imms) {
2663 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2664
2665 EVT VT = N->getValueType(0);
2666 unsigned BitWidth = VT.getSizeInBits();
2667 assert((VT == MVT::i32 || VT == MVT::i64) &&
2668 "Type checking must have been done before calling this function");
2669
2670 SDValue Op = N->getOperand(0);
2671 if (Op->getOpcode() == ISD::TRUNCATE) {
2672 Op = Op->getOperand(0);
2673 VT = Op->getValueType(0);
2674 BitWidth = VT.getSizeInBits();
2675 }
2676
2677 uint64_t ShiftImm;
2678 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2679 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2680 return false;
2681
2682 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2683 if (ShiftImm + Width > BitWidth)
2684 return false;
2685
2686 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2687 Opd0 = Op.getOperand(0);
2688 Immr = ShiftImm;
2689 Imms = ShiftImm + Width - 1;
2690 return true;
2691}
2692
2694 SDValue &Opd0, unsigned &LSB,
2695 unsigned &MSB) {
2696 // We are looking for the following pattern which basically extracts several
2697 // continuous bits from the source value and places it from the LSB of the
2698 // destination value, all other bits of the destination value or set to zero:
2699 //
2700 // Value2 = AND Value, MaskImm
2701 // SRL Value2, ShiftImm
2702 //
2703 // with MaskImm >> ShiftImm to search for the bit width.
2704 //
2705 // This gets selected into a single UBFM:
2706 //
2707 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2708 //
2709
2710 if (N->getOpcode() != ISD::SRL)
2711 return false;
2712
2713 uint64_t AndMask = 0;
2714 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2715 return false;
2716
2717 Opd0 = N->getOperand(0).getOperand(0);
2718
2719 uint64_t SrlImm = 0;
2720 if (!isIntImmediate(N->getOperand(1), SrlImm))
2721 return false;
2722
2723 // Check whether we really have several bits extract here.
2724 if (!isMask_64(AndMask >> SrlImm))
2725 return false;
2726
2727 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2728 LSB = SrlImm;
2729 MSB = llvm::Log2_64(AndMask);
2730 return true;
2731}
2732
2733static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2734 unsigned &Immr, unsigned &Imms,
2735 bool BiggerPattern) {
2736 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2737 "N must be a SHR/SRA operation to call this function");
2738
2739 EVT VT = N->getValueType(0);
2740
2741 // Here we can test the type of VT and return false when the type does not
2742 // match, but since it is done prior to that call in the current context
2743 // we turned that into an assert to avoid redundant code.
2744 assert((VT == MVT::i32 || VT == MVT::i64) &&
2745 "Type checking must have been done before calling this function");
2746
2747 // Check for AND + SRL doing several bits extract.
2748 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2749 return true;
2750
2751 // We're looking for a shift of a shift.
2752 uint64_t ShlImm = 0;
2753 uint64_t TruncBits = 0;
2754 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2755 Opd0 = N->getOperand(0).getOperand(0);
2756 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2757 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2758 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2759 // be considered as setting high 32 bits as zero. Our strategy here is to
2760 // always generate 64bit UBFM. This consistency will help the CSE pass
2761 // later find more redundancy.
2762 Opd0 = N->getOperand(0).getOperand(0);
2763 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2764 VT = Opd0.getValueType();
2765 assert(VT == MVT::i64 && "the promoted type should be i64");
2766 } else if (BiggerPattern) {
2767 // Let's pretend a 0 shift left has been performed.
2768 // FIXME: Currently we limit this to the bigger pattern case,
2769 // because some optimizations expect AND and not UBFM
2770 Opd0 = N->getOperand(0);
2771 } else
2772 return false;
2773
2774 // Missing combines/constant folding may have left us with strange
2775 // constants.
2776 if (ShlImm >= VT.getSizeInBits()) {
2777 LLVM_DEBUG(
2778 (dbgs() << N
2779 << ": Found large shift immediate, this should not happen\n"));
2780 return false;
2781 }
2782
2783 uint64_t SrlImm = 0;
2784 if (!isIntImmediate(N->getOperand(1), SrlImm))
2785 return false;
2786
2787 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2788 "bad amount in shift node!");
2789 int immr = SrlImm - ShlImm;
2790 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2791 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2792 // SRA requires a signed extraction
2793 if (VT == MVT::i32)
2794 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2795 else
2796 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2797 return true;
2798}
2799
2800bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2801 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2802
2803 EVT VT = N->getValueType(0);
2804 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2805 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2806 return false;
2807
2808 uint64_t ShiftImm;
2809 SDValue Op = N->getOperand(0);
2810 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2811 return false;
2812
2813 SDLoc dl(N);
2814 // Extend the incoming operand of the shift to 64-bits.
2815 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2816 unsigned Immr = ShiftImm;
2817 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2818 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2819 CurDAG->getTargetConstant(Imms, dl, VT)};
2820 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2821 return true;
2822}
2823
2824static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2825 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2826 unsigned NumberOfIgnoredLowBits = 0,
2827 bool BiggerPattern = false) {
2828 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2829 return false;
2830
2831 switch (N->getOpcode()) {
2832 default:
2833 if (!N->isMachineOpcode())
2834 return false;
2835 break;
2836 case ISD::AND:
2837 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2838 NumberOfIgnoredLowBits, BiggerPattern);
2839 case ISD::SRL:
2840 case ISD::SRA:
2841 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2842
2844 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2845 }
2846
2847 unsigned NOpc = N->getMachineOpcode();
2848 switch (NOpc) {
2849 default:
2850 return false;
2851 case AArch64::SBFMWri:
2852 case AArch64::UBFMWri:
2853 case AArch64::SBFMXri:
2854 case AArch64::UBFMXri:
2855 Opc = NOpc;
2856 Opd0 = N->getOperand(0);
2857 Immr = N->getConstantOperandVal(1);
2858 Imms = N->getConstantOperandVal(2);
2859 return true;
2860 }
2861 // Unreachable
2862 return false;
2863}
2864
2865bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2866 unsigned Opc, Immr, Imms;
2867 SDValue Opd0;
2868 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2869 return false;
2870
2871 EVT VT = N->getValueType(0);
2872 SDLoc dl(N);
2873
2874 // If the bit extract operation is 64bit but the original type is 32bit, we
2875 // need to add one EXTRACT_SUBREG.
2876 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2877 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2878 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2879
2880 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2881 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2882 MVT::i32, SDValue(BFM, 0));
2883 ReplaceNode(N, Inner.getNode());
2884 return true;
2885 }
2886
2887 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2888 CurDAG->getTargetConstant(Imms, dl, VT)};
2889 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2890 return true;
2891}
2892
2893/// Does DstMask form a complementary pair with the mask provided by
2894/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2895/// this asks whether DstMask zeroes precisely those bits that will be set by
2896/// the other half.
2897static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2898 unsigned NumberOfIgnoredHighBits, EVT VT) {
2899 assert((VT == MVT::i32 || VT == MVT::i64) &&
2900 "i32 or i64 mask type expected!");
2901 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2902
2903 // Enable implicitTrunc as we're intentionally ignoring high bits.
2904 APInt SignificantDstMask =
2905 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
2906 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2907
2908 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2909 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2910}
2911
2912// Look for bits that will be useful for later uses.
2913// A bit is consider useless as soon as it is dropped and never used
2914// before it as been dropped.
2915// E.g., looking for useful bit of x
2916// 1. y = x & 0x7
2917// 2. z = y >> 2
2918// After #1, x useful bits are 0x7, then the useful bits of x, live through
2919// y.
2920// After #2, the useful bits of x are 0x4.
2921// However, if x is used on an unpredictable instruction, then all its bits
2922// are useful.
2923// E.g.
2924// 1. y = x & 0x7
2925// 2. z = y >> 2
2926// 3. str x, [@x]
2927static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2928
2930 unsigned Depth) {
2931 uint64_t Imm =
2932 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2933 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2934 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2935 getUsefulBits(Op, UsefulBits, Depth + 1);
2936}
2937
2939 uint64_t Imm, uint64_t MSB,
2940 unsigned Depth) {
2941 // inherit the bitwidth value
2942 APInt OpUsefulBits(UsefulBits);
2943 OpUsefulBits = 1;
2944
2945 if (MSB >= Imm) {
2946 OpUsefulBits <<= MSB - Imm + 1;
2947 --OpUsefulBits;
2948 // The interesting part will be in the lower part of the result
2949 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2950 // The interesting part was starting at Imm in the argument
2951 OpUsefulBits <<= Imm;
2952 } else {
2953 OpUsefulBits <<= MSB + 1;
2954 --OpUsefulBits;
2955 // The interesting part will be shifted in the result
2956 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2957 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2958 // The interesting part was at zero in the argument
2959 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2960 }
2961
2962 UsefulBits &= OpUsefulBits;
2963}
2964
2965static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2966 unsigned Depth) {
2967 uint64_t Imm =
2968 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2969 uint64_t MSB =
2970 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2971
2972 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2973}
2974
2976 unsigned Depth) {
2977 uint64_t ShiftTypeAndValue =
2978 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2979 APInt Mask(UsefulBits);
2980 Mask.clearAllBits();
2981 Mask.flipAllBits();
2982
2983 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2984 // Shift Left
2985 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2986 Mask <<= ShiftAmt;
2987 getUsefulBits(Op, Mask, Depth + 1);
2988 Mask.lshrInPlace(ShiftAmt);
2989 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2990 // Shift Right
2991 // We do not handle AArch64_AM::ASR, because the sign will change the
2992 // number of useful bits
2993 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2994 Mask.lshrInPlace(ShiftAmt);
2995 getUsefulBits(Op, Mask, Depth + 1);
2996 Mask <<= ShiftAmt;
2997 } else
2998 return;
2999
3000 UsefulBits &= Mask;
3001}
3002
3003static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
3004 unsigned Depth) {
3005 uint64_t Imm =
3006 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3007 uint64_t MSB =
3008 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
3009
3010 APInt OpUsefulBits(UsefulBits);
3011 OpUsefulBits = 1;
3012
3013 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3014 ResultUsefulBits.flipAllBits();
3015 APInt Mask(UsefulBits.getBitWidth(), 0);
3016
3017 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
3018
3019 if (MSB >= Imm) {
3020 // The instruction is a BFXIL.
3021 uint64_t Width = MSB - Imm + 1;
3022 uint64_t LSB = Imm;
3023
3024 OpUsefulBits <<= Width;
3025 --OpUsefulBits;
3026
3027 if (Op.getOperand(1) == Orig) {
3028 // Copy the low bits from the result to bits starting from LSB.
3029 Mask = ResultUsefulBits & OpUsefulBits;
3030 Mask <<= LSB;
3031 }
3032
3033 if (Op.getOperand(0) == Orig)
3034 // Bits starting from LSB in the input contribute to the result.
3035 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3036 } else {
3037 // The instruction is a BFI.
3038 uint64_t Width = MSB + 1;
3039 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3040
3041 OpUsefulBits <<= Width;
3042 --OpUsefulBits;
3043 OpUsefulBits <<= LSB;
3044
3045 if (Op.getOperand(1) == Orig) {
3046 // Copy the bits from the result to the zero bits.
3047 Mask = ResultUsefulBits & OpUsefulBits;
3048 Mask.lshrInPlace(LSB);
3049 }
3050
3051 if (Op.getOperand(0) == Orig)
3052 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3053 }
3054
3055 UsefulBits &= Mask;
3056}
3057
3058static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3059 SDValue Orig, unsigned Depth) {
3060
3061 // Users of this node should have already been instruction selected
3062 // FIXME: Can we turn that into an assert?
3063 if (!UserNode->isMachineOpcode())
3064 return;
3065
3066 switch (UserNode->getMachineOpcode()) {
3067 default:
3068 return;
3069 case AArch64::ANDSWri:
3070 case AArch64::ANDSXri:
3071 case AArch64::ANDWri:
3072 case AArch64::ANDXri:
3073 // We increment Depth only when we call the getUsefulBits
3074 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3075 Depth);
3076 case AArch64::UBFMWri:
3077 case AArch64::UBFMXri:
3078 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3079
3080 case AArch64::ORRWrs:
3081 case AArch64::ORRXrs:
3082 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3083 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3084 Depth);
3085 return;
3086 case AArch64::BFMWri:
3087 case AArch64::BFMXri:
3088 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3089
3090 case AArch64::STRBBui:
3091 case AArch64::STURBBi:
3092 if (UserNode->getOperand(0) != Orig)
3093 return;
3094 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3095 return;
3096
3097 case AArch64::STRHHui:
3098 case AArch64::STURHHi:
3099 if (UserNode->getOperand(0) != Orig)
3100 return;
3101 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3102 return;
3103 }
3104}
3105
3106static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3108 return;
3109 // Initialize UsefulBits
3110 if (!Depth) {
3111 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3112 // At the beginning, assume every produced bits is useful
3113 UsefulBits = APInt(Bitwidth, 0);
3114 UsefulBits.flipAllBits();
3115 }
3116 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3117
3118 for (SDNode *Node : Op.getNode()->users()) {
3119 // A use cannot produce useful bits
3120 APInt UsefulBitsForUse = APInt(UsefulBits);
3121 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3122 UsersUsefulBits |= UsefulBitsForUse;
3123 }
3124 // UsefulBits contains the produced bits that are meaningful for the
3125 // current definition, thus a user cannot make a bit meaningful at
3126 // this point
3127 UsefulBits &= UsersUsefulBits;
3128}
3129
3130/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3131/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3132/// 0, return Op unchanged.
3133static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3134 if (ShlAmount == 0)
3135 return Op;
3136
3137 EVT VT = Op.getValueType();
3138 SDLoc dl(Op);
3139 unsigned BitWidth = VT.getSizeInBits();
3140 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3141
3142 SDNode *ShiftNode;
3143 if (ShlAmount > 0) {
3144 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3145 ShiftNode = CurDAG->getMachineNode(
3146 UBFMOpc, dl, VT, Op,
3147 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3148 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3149 } else {
3150 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3151 assert(ShlAmount < 0 && "expected right shift");
3152 int ShrAmount = -ShlAmount;
3153 ShiftNode = CurDAG->getMachineNode(
3154 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3155 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3156 }
3157
3158 return SDValue(ShiftNode, 0);
3159}
3160
3161// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3162static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3163 bool BiggerPattern,
3164 const uint64_t NonZeroBits,
3165 SDValue &Src, int &DstLSB,
3166 int &Width);
3167
3168// For bit-field-positioning pattern "shl VAL, N)".
3169static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3170 bool BiggerPattern,
3171 const uint64_t NonZeroBits,
3172 SDValue &Src, int &DstLSB,
3173 int &Width);
3174
3175/// Does this tree qualify as an attempt to move a bitfield into position,
3176/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3178 bool BiggerPattern, SDValue &Src,
3179 int &DstLSB, int &Width) {
3180 EVT VT = Op.getValueType();
3181 unsigned BitWidth = VT.getSizeInBits();
3182 (void)BitWidth;
3183 assert(BitWidth == 32 || BitWidth == 64);
3184
3185 KnownBits Known = CurDAG->computeKnownBits(Op);
3186
3187 // Non-zero in the sense that they're not provably zero, which is the key
3188 // point if we want to use this value
3189 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3190 if (!isShiftedMask_64(NonZeroBits))
3191 return false;
3192
3193 switch (Op.getOpcode()) {
3194 default:
3195 break;
3196 case ISD::AND:
3197 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3198 NonZeroBits, Src, DstLSB, Width);
3199 case ISD::SHL:
3200 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3201 NonZeroBits, Src, DstLSB, Width);
3202 }
3203
3204 return false;
3205}
3206
3208 bool BiggerPattern,
3209 const uint64_t NonZeroBits,
3210 SDValue &Src, int &DstLSB,
3211 int &Width) {
3212 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3213
3214 EVT VT = Op.getValueType();
3215 assert((VT == MVT::i32 || VT == MVT::i64) &&
3216 "Caller guarantees VT is one of i32 or i64");
3217 (void)VT;
3218
3219 uint64_t AndImm;
3220 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3221 return false;
3222
3223 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3224 // 1) (AndImm & (1 << POS) == 0)
3225 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3226 //
3227 // 1) and 2) don't agree so something must be wrong (e.g., in
3228 // 'SelectionDAG::computeKnownBits')
3229 assert((~AndImm & NonZeroBits) == 0 &&
3230 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3231
3232 SDValue AndOp0 = Op.getOperand(0);
3233
3234 uint64_t ShlImm;
3235 SDValue ShlOp0;
3236 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3237 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3238 ShlOp0 = AndOp0.getOperand(0);
3239 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3241 ShlImm)) {
3242 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3243
3244 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3245 SDValue ShlVal = AndOp0.getOperand(0);
3246
3247 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3248 // expect VT to be MVT::i32.
3249 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3250
3251 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3252 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3253 } else
3254 return false;
3255
3256 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3257 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3258 // AndOp0+AND.
3259 if (!BiggerPattern && !AndOp0.hasOneUse())
3260 return false;
3261
3262 DstLSB = llvm::countr_zero(NonZeroBits);
3263 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3264
3265 // Bail out on large Width. This happens when no proper combining / constant
3266 // folding was performed.
3267 if (Width >= (int)VT.getSizeInBits()) {
3268 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3269 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3270 // "val".
3271 // If VT is i32, what Width >= 32 means:
3272 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3273 // demands at least 'Width' bits (after dag-combiner). This together with
3274 // `any_extend` Op (undefined higher bits) indicates missed combination
3275 // when lowering the 'and' IR instruction to an machine IR instruction.
3276 LLVM_DEBUG(
3277 dbgs()
3278 << "Found large Width in bit-field-positioning -- this indicates no "
3279 "proper combining / constant folding was performed\n");
3280 return false;
3281 }
3282
3283 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3284 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3285 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3286 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3287 // which case it is not profitable to insert an extra shift.
3288 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3289 return false;
3290
3291 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3292 return true;
3293}
3294
3295// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3296// UBFIZ.
3298 SDValue &Src, int &DstLSB,
3299 int &Width) {
3300 // Caller should have verified that N is a left shift with constant shift
3301 // amount; asserts that.
3302 assert(Op.getOpcode() == ISD::SHL &&
3303 "Op.getNode() should be a SHL node to call this function");
3304 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3305 "Op.getNode() should shift ShlImm to call this function");
3306
3307 uint64_t AndImm = 0;
3308 SDValue Op0 = Op.getOperand(0);
3309 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3310 return false;
3311
3312 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3313 if (isMask_64(ShiftedAndImm)) {
3314 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3315 // should end with Mask, and could be prefixed with random bits if those
3316 // bits are shifted out.
3317 //
3318 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3319 // the AND result corresponding to those bits are shifted out, so it's fine
3320 // to not extract them.
3321 Width = llvm::countr_one(ShiftedAndImm);
3322 DstLSB = ShlImm;
3323 Src = Op0.getOperand(0);
3324 return true;
3325 }
3326 return false;
3327}
3328
3330 bool BiggerPattern,
3331 const uint64_t NonZeroBits,
3332 SDValue &Src, int &DstLSB,
3333 int &Width) {
3334 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3335
3336 EVT VT = Op.getValueType();
3337 assert((VT == MVT::i32 || VT == MVT::i64) &&
3338 "Caller guarantees that type is i32 or i64");
3339 (void)VT;
3340
3341 uint64_t ShlImm;
3342 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3343 return false;
3344
3345 if (!BiggerPattern && !Op.hasOneUse())
3346 return false;
3347
3348 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3349 return true;
3350
3351 DstLSB = llvm::countr_zero(NonZeroBits);
3352 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3353
3354 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3355 return false;
3356
3357 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3358 return true;
3359}
3360
3361static bool isShiftedMask(uint64_t Mask, EVT VT) {
3362 assert(VT == MVT::i32 || VT == MVT::i64);
3363 if (VT == MVT::i32)
3364 return isShiftedMask_32(Mask);
3365 return isShiftedMask_64(Mask);
3366}
3367
3368// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3369// inserted only sets known zero bits.
3371 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3372
3373 EVT VT = N->getValueType(0);
3374 if (VT != MVT::i32 && VT != MVT::i64)
3375 return false;
3376
3377 unsigned BitWidth = VT.getSizeInBits();
3378
3379 uint64_t OrImm;
3380 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3381 return false;
3382
3383 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3384 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3385 // performance neutral.
3387 return false;
3388
3389 uint64_t MaskImm;
3390 SDValue And = N->getOperand(0);
3391 // Must be a single use AND with an immediate operand.
3392 if (!And.hasOneUse() ||
3393 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3394 return false;
3395
3396 // Compute the Known Zero for the AND as this allows us to catch more general
3397 // cases than just looking for AND with imm.
3398 KnownBits Known = CurDAG->computeKnownBits(And);
3399
3400 // Non-zero in the sense that they're not provably zero, which is the key
3401 // point if we want to use this value.
3402 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3403
3404 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3405 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3406 return false;
3407
3408 // The bits being inserted must only set those bits that are known to be zero.
3409 if ((OrImm & NotKnownZero) != 0) {
3410 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3411 // currently handle this case.
3412 return false;
3413 }
3414
3415 // BFI/BFXIL dst, src, #lsb, #width.
3416 int LSB = llvm::countr_one(NotKnownZero);
3417 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3418
3419 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3420 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3421 unsigned ImmS = Width - 1;
3422
3423 // If we're creating a BFI instruction avoid cases where we need more
3424 // instructions to materialize the BFI constant as compared to the original
3425 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3426 // should be no worse in this case.
3427 bool IsBFI = LSB != 0;
3428 uint64_t BFIImm = OrImm >> LSB;
3429 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3430 // We have a BFI instruction and we know the constant can't be materialized
3431 // with a ORR-immediate with the zero register.
3432 unsigned OrChunks = 0, BFIChunks = 0;
3433 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3434 if (((OrImm >> Shift) & 0xFFFF) != 0)
3435 ++OrChunks;
3436 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3437 ++BFIChunks;
3438 }
3439 if (BFIChunks > OrChunks)
3440 return false;
3441 }
3442
3443 // Materialize the constant to be inserted.
3444 SDLoc DL(N);
3445 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3446 SDNode *MOVI = CurDAG->getMachineNode(
3447 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3448
3449 // Create the BFI/BFXIL instruction.
3450 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3451 CurDAG->getTargetConstant(ImmR, DL, VT),
3452 CurDAG->getTargetConstant(ImmS, DL, VT)};
3453 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3454 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3455 return true;
3456}
3457
3459 SDValue &ShiftedOperand,
3460 uint64_t &EncodedShiftImm) {
3461 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3462 if (!Dst.hasOneUse())
3463 return false;
3464
3465 EVT VT = Dst.getValueType();
3466 assert((VT == MVT::i32 || VT == MVT::i64) &&
3467 "Caller should guarantee that VT is one of i32 or i64");
3468 const unsigned SizeInBits = VT.getSizeInBits();
3469
3470 SDLoc DL(Dst.getNode());
3471 uint64_t AndImm, ShlImm;
3472 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3473 isShiftedMask_64(AndImm)) {
3474 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3475 SDValue DstOp0 = Dst.getOperand(0);
3476 if (!DstOp0.hasOneUse())
3477 return false;
3478
3479 // An example to illustrate the transformation
3480 // From:
3481 // lsr x8, x1, #1
3482 // and x8, x8, #0x3f80
3483 // bfxil x8, x1, #0, #7
3484 // To:
3485 // and x8, x23, #0x7f
3486 // ubfx x9, x23, #8, #7
3487 // orr x23, x8, x9, lsl #7
3488 //
3489 // The number of instructions remains the same, but ORR is faster than BFXIL
3490 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3491 // the dependency chain is improved after the transformation.
3492 uint64_t SrlImm;
3493 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3494 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3495 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3496 unsigned MaskWidth =
3497 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3498 unsigned UBFMOpc =
3499 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3500 SDNode *UBFMNode = CurDAG->getMachineNode(
3501 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3502 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3503 VT),
3504 CurDAG->getTargetConstant(
3505 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3506 ShiftedOperand = SDValue(UBFMNode, 0);
3507 EncodedShiftImm = AArch64_AM::getShifterImm(
3508 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3509 return true;
3510 }
3511 }
3512 return false;
3513 }
3514
3515 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3516 ShiftedOperand = Dst.getOperand(0);
3517 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3518 return true;
3519 }
3520
3521 uint64_t SrlImm;
3522 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3523 ShiftedOperand = Dst.getOperand(0);
3524 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3525 return true;
3526 }
3527 return false;
3528}
3529
3530// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3531// the operands and select it to AArch64::ORR with shifted registers if
3532// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3533static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3534 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3535 const bool BiggerPattern) {
3536 EVT VT = N->getValueType(0);
3537 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3538 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3539 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3540 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3541 assert((VT == MVT::i32 || VT == MVT::i64) &&
3542 "Expect result type to be i32 or i64 since N is combinable to BFM");
3543 SDLoc DL(N);
3544
3545 // Bail out if BFM simplifies away one node in BFM Dst.
3546 if (OrOpd1 != Dst)
3547 return false;
3548
3549 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3550 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3551 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3552 if (BiggerPattern) {
3553 uint64_t SrcAndImm;
3554 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3555 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3556 // OrOpd0 = AND Src, #Mask
3557 // So BFM simplifies away one AND node from Src and doesn't simplify away
3558 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3559 // one node (from Rd), ORR is better since it has higher throughput and
3560 // smaller latency than BFM on many AArch64 processors (and for the rest
3561 // ORR is at least as good as BFM).
3562 SDValue ShiftedOperand;
3563 uint64_t EncodedShiftImm;
3564 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3565 EncodedShiftImm)) {
3566 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3567 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3568 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3569 return true;
3570 }
3571 }
3572 return false;
3573 }
3574
3575 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3576
3577 uint64_t ShlImm;
3578 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3579 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3580 SDValue Ops[] = {
3581 Dst, Src,
3582 CurDAG->getTargetConstant(
3584 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3585 return true;
3586 }
3587
3588 // Select the following pattern to left-shifted operand rather than BFI.
3589 // %val1 = op ..
3590 // %val2 = shl %val1, #imm
3591 // %res = or %val1, %val2
3592 //
3593 // If N is selected to be BFI, we know that
3594 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3595 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3596 //
3597 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3598 if (OrOpd0.getOperand(0) == OrOpd1) {
3599 SDValue Ops[] = {
3600 OrOpd1, OrOpd1,
3601 CurDAG->getTargetConstant(
3603 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3604 return true;
3605 }
3606 }
3607
3608 uint64_t SrlImm;
3609 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3610 // Select the following pattern to right-shifted operand rather than BFXIL.
3611 // %val1 = op ..
3612 // %val2 = lshr %val1, #imm
3613 // %res = or %val1, %val2
3614 //
3615 // If N is selected to be BFXIL, we know that
3616 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3617 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3618 //
3619 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3620 if (OrOpd0.getOperand(0) == OrOpd1) {
3621 SDValue Ops[] = {
3622 OrOpd1, OrOpd1,
3623 CurDAG->getTargetConstant(
3625 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3626 return true;
3627 }
3628 }
3629
3630 return false;
3631}
3632
3633static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3634 SelectionDAG *CurDAG) {
3635 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3636
3637 EVT VT = N->getValueType(0);
3638 if (VT != MVT::i32 && VT != MVT::i64)
3639 return false;
3640
3641 unsigned BitWidth = VT.getSizeInBits();
3642
3643 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3644 // have the expected shape. Try to undo that.
3645
3646 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3647 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3648
3649 // Given a OR operation, check if we have the following pattern
3650 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3651 // isBitfieldExtractOp)
3652 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3653 // countTrailingZeros(mask2) == imm2 - imm + 1
3654 // f = d | c
3655 // if yes, replace the OR instruction with:
3656 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3657
3658 // OR is commutative, check all combinations of operand order and values of
3659 // BiggerPattern, i.e.
3660 // Opd0, Opd1, BiggerPattern=false
3661 // Opd1, Opd0, BiggerPattern=false
3662 // Opd0, Opd1, BiggerPattern=true
3663 // Opd1, Opd0, BiggerPattern=true
3664 // Several of these combinations may match, so check with BiggerPattern=false
3665 // first since that will produce better results by matching more instructions
3666 // and/or inserting fewer extra instructions.
3667 for (int I = 0; I < 4; ++I) {
3668
3669 SDValue Dst, Src;
3670 unsigned ImmR, ImmS;
3671 bool BiggerPattern = I / 2;
3672 SDValue OrOpd0Val = N->getOperand(I % 2);
3673 SDNode *OrOpd0 = OrOpd0Val.getNode();
3674 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3675 SDNode *OrOpd1 = OrOpd1Val.getNode();
3676
3677 unsigned BFXOpc;
3678 int DstLSB, Width;
3679 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3680 NumberOfIgnoredLowBits, BiggerPattern)) {
3681 // Check that the returned opcode is compatible with the pattern,
3682 // i.e., same type and zero extended (U and not S)
3683 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3684 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3685 continue;
3686
3687 // Compute the width of the bitfield insertion
3688 DstLSB = 0;
3689 Width = ImmS - ImmR + 1;
3690 // FIXME: This constraint is to catch bitfield insertion we may
3691 // want to widen the pattern if we want to grab general bitfield
3692 // move case
3693 if (Width <= 0)
3694 continue;
3695
3696 // If the mask on the insertee is correct, we have a BFXIL operation. We
3697 // can share the ImmR and ImmS values from the already-computed UBFM.
3698 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3699 BiggerPattern,
3700 Src, DstLSB, Width)) {
3701 ImmR = (BitWidth - DstLSB) % BitWidth;
3702 ImmS = Width - 1;
3703 } else
3704 continue;
3705
3706 // Check the second part of the pattern
3707 EVT VT = OrOpd1Val.getValueType();
3708 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3709
3710 // Compute the Known Zero for the candidate of the first operand.
3711 // This allows to catch more general case than just looking for
3712 // AND with imm. Indeed, simplify-demanded-bits may have removed
3713 // the AND instruction because it proves it was useless.
3714 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3715
3716 // Check if there is enough room for the second operand to appear
3717 // in the first one
3718 APInt BitsToBeInserted =
3719 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3720
3721 if ((BitsToBeInserted & ~Known.Zero) != 0)
3722 continue;
3723
3724 // Set the first operand
3725 uint64_t Imm;
3726 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3727 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3728 // In that case, we can eliminate the AND
3729 Dst = OrOpd1->getOperand(0);
3730 else
3731 // Maybe the AND has been removed by simplify-demanded-bits
3732 // or is useful because it discards more bits
3733 Dst = OrOpd1Val;
3734
3735 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3736 // with shifted operand is more efficient.
3737 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3738 BiggerPattern))
3739 return true;
3740
3741 // both parts match
3742 SDLoc DL(N);
3743 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3744 CurDAG->getTargetConstant(ImmS, DL, VT)};
3745 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3746 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3747 return true;
3748 }
3749
3750 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3751 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3752 // mask (e.g., 0x000ffff0).
3753 uint64_t Mask0Imm, Mask1Imm;
3754 SDValue And0 = N->getOperand(0);
3755 SDValue And1 = N->getOperand(1);
3756 if (And0.hasOneUse() && And1.hasOneUse() &&
3757 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3758 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3759 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3760 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3761
3762 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3763 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3764 // bits to be inserted.
3765 if (isShiftedMask(Mask0Imm, VT)) {
3766 std::swap(And0, And1);
3767 std::swap(Mask0Imm, Mask1Imm);
3768 }
3769
3770 SDValue Src = And1->getOperand(0);
3771 SDValue Dst = And0->getOperand(0);
3772 unsigned LSB = llvm::countr_zero(Mask1Imm);
3773 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3774
3775 // The BFXIL inserts the low-order bits from a source register, so right
3776 // shift the needed bits into place.
3777 SDLoc DL(N);
3778 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3779 uint64_t LsrImm = LSB;
3780 if (Src->hasOneUse() &&
3781 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3782 (LsrImm + LSB) < BitWidth) {
3783 Src = Src->getOperand(0);
3784 LsrImm += LSB;
3785 }
3786
3787 SDNode *LSR = CurDAG->getMachineNode(
3788 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3789 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3790
3791 // BFXIL is an alias of BFM, so translate to BFM operands.
3792 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3793 unsigned ImmS = Width - 1;
3794
3795 // Create the BFXIL instruction.
3796 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3797 CurDAG->getTargetConstant(ImmR, DL, VT),
3798 CurDAG->getTargetConstant(ImmS, DL, VT)};
3799 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3800 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3801 return true;
3802 }
3803
3804 return false;
3805}
3806
3807bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3808 if (N->getOpcode() != ISD::OR)
3809 return false;
3810
3811 APInt NUsefulBits;
3812 getUsefulBits(SDValue(N, 0), NUsefulBits);
3813
3814 // If all bits are not useful, just return UNDEF.
3815 if (!NUsefulBits) {
3816 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3817 return true;
3818 }
3819
3820 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3821 return true;
3822
3823 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3824}
3825
3826/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3827/// equivalent of a left shift by a constant amount followed by an and masking
3828/// out a contiguous set of bits.
3829bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3830 if (N->getOpcode() != ISD::AND)
3831 return false;
3832
3833 EVT VT = N->getValueType(0);
3834 if (VT != MVT::i32 && VT != MVT::i64)
3835 return false;
3836
3837 SDValue Op0;
3838 int DstLSB, Width;
3839 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3840 Op0, DstLSB, Width))
3841 return false;
3842
3843 // ImmR is the rotate right amount.
3844 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3845 // ImmS is the most significant bit of the source to be moved.
3846 unsigned ImmS = Width - 1;
3847
3848 SDLoc DL(N);
3849 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3850 CurDAG->getTargetConstant(ImmS, DL, VT)};
3851 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3852 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3853 return true;
3854}
3855
3856/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3857/// variable shift/rotate instructions.
3858bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3859 EVT VT = N->getValueType(0);
3860
3861 unsigned Opc;
3862 switch (N->getOpcode()) {
3863 case ISD::ROTR:
3864 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3865 break;
3866 case ISD::SHL:
3867 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3868 break;
3869 case ISD::SRL:
3870 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3871 break;
3872 case ISD::SRA:
3873 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3874 break;
3875 default:
3876 return false;
3877 }
3878
3879 uint64_t Size;
3880 uint64_t Bits;
3881 if (VT == MVT::i32) {
3882 Bits = 5;
3883 Size = 32;
3884 } else if (VT == MVT::i64) {
3885 Bits = 6;
3886 Size = 64;
3887 } else
3888 return false;
3889
3890 SDValue ShiftAmt = N->getOperand(1);
3891 SDLoc DL(N);
3892 SDValue NewShiftAmt;
3893
3894 // Skip over an extend of the shift amount.
3895 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3896 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3897 ShiftAmt = ShiftAmt->getOperand(0);
3898
3899 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3900 SDValue Add0 = ShiftAmt->getOperand(0);
3901 SDValue Add1 = ShiftAmt->getOperand(1);
3902 uint64_t Add0Imm;
3903 uint64_t Add1Imm;
3904 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3905 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3906 // to avoid the ADD/SUB.
3907 NewShiftAmt = Add0;
3908 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3909 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3910 (Add0Imm % Size == 0)) {
3911 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3912 // to generate a NEG instead of a SUB from a constant.
3913 unsigned NegOpc;
3914 unsigned ZeroReg;
3915 EVT SubVT = ShiftAmt->getValueType(0);
3916 if (SubVT == MVT::i32) {
3917 NegOpc = AArch64::SUBWrr;
3918 ZeroReg = AArch64::WZR;
3919 } else {
3920 assert(SubVT == MVT::i64);
3921 NegOpc = AArch64::SUBXrr;
3922 ZeroReg = AArch64::XZR;
3923 }
3924 SDValue Zero =
3925 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3926 MachineSDNode *Neg =
3927 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3928 NewShiftAmt = SDValue(Neg, 0);
3929 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3930 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3931 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3932 // to generate a NOT instead of a SUB from a constant.
3933 unsigned NotOpc;
3934 unsigned ZeroReg;
3935 EVT SubVT = ShiftAmt->getValueType(0);
3936 if (SubVT == MVT::i32) {
3937 NotOpc = AArch64::ORNWrr;
3938 ZeroReg = AArch64::WZR;
3939 } else {
3940 assert(SubVT == MVT::i64);
3941 NotOpc = AArch64::ORNXrr;
3942 ZeroReg = AArch64::XZR;
3943 }
3944 SDValue Zero =
3945 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3946 MachineSDNode *Not =
3947 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3948 NewShiftAmt = SDValue(Not, 0);
3949 } else
3950 return false;
3951 } else {
3952 // If the shift amount is masked with an AND, check that the mask covers the
3953 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3954 // the AND.
3955 uint64_t MaskImm;
3956 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3957 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3958 return false;
3959
3960 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3961 return false;
3962
3963 NewShiftAmt = ShiftAmt->getOperand(0);
3964 }
3965
3966 // Narrow/widen the shift amount to match the size of the shift operation.
3967 if (VT == MVT::i32)
3968 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3969 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3970 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3971 MachineSDNode *Ext = CurDAG->getMachineNode(
3972 AArch64::SUBREG_TO_REG, DL, VT,
3973 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3974 NewShiftAmt = SDValue(Ext, 0);
3975 }
3976
3977 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3978 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3979 return true;
3980}
3981
3983 SDValue &FixedPos,
3984 unsigned RegWidth,
3985 bool isReciprocal) {
3986 APFloat FVal(0.0);
3988 FVal = CN->getValueAPF();
3989 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3990 // Some otherwise illegal constants are allowed in this case.
3991 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3992 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3993 return false;
3994
3995 ConstantPoolSDNode *CN =
3996 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3997 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3998 } else
3999 return false;
4000
4001 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
4002 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
4003 // x-register.
4004 //
4005 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
4006 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
4007 // integers.
4008 bool IsExact;
4009
4010 if (isReciprocal)
4011 if (!FVal.getExactInverse(&FVal))
4012 return false;
4013
4014 // fbits is between 1 and 64 in the worst-case, which means the fmul
4015 // could have 2^64 as an actual operand. Need 65 bits of precision.
4016 APSInt IntVal(65, true);
4017 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
4018
4019 // N.b. isPowerOf2 also checks for > 0.
4020 if (!IsExact || !IntVal.isPowerOf2())
4021 return false;
4022 unsigned FBits = IntVal.logBase2();
4023
4024 // Checks above should have guaranteed that we haven't lost information in
4025 // finding FBits, but it must still be in range.
4026 if (FBits == 0 || FBits > RegWidth) return false;
4027
4028 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4029 return true;
4030}
4031
4032bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4033 unsigned RegWidth) {
4034 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4035 false);
4036}
4037
4038bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4039 SDValue &FixedPos,
4040 unsigned RegWidth) {
4041 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4042 true);
4043}
4044
4045// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4046// of the string and obtains the integer values from them and combines these
4047// into a single value to be used in the MRS/MSR instruction.
4050 RegString.split(Fields, ':');
4051
4052 if (Fields.size() == 1)
4053 return -1;
4054
4055 assert(Fields.size() == 5
4056 && "Invalid number of fields in read register string");
4057
4059 bool AllIntFields = true;
4060
4061 for (StringRef Field : Fields) {
4062 unsigned IntField;
4063 AllIntFields &= !Field.getAsInteger(10, IntField);
4064 Ops.push_back(IntField);
4065 }
4066
4067 assert(AllIntFields &&
4068 "Unexpected non-integer value in special register string.");
4069 (void)AllIntFields;
4070
4071 // Need to combine the integer fields of the string into a single value
4072 // based on the bit encoding of MRS/MSR instruction.
4073 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
4074 (Ops[3] << 3) | (Ops[4]);
4075}
4076
4077// Lower the read_register intrinsic to an MRS instruction node if the special
4078// register string argument is either of the form detailed in the ALCE (the
4079// form described in getIntOperandsFromRegisterString) or is a named register
4080// known by the MRS SysReg mapper.
4081bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4082 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4083 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4084 SDLoc DL(N);
4085
4086 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4087
4088 unsigned Opcode64Bit = AArch64::MRS;
4089 int Imm = getIntOperandFromRegisterString(RegString->getString());
4090 if (Imm == -1) {
4091 // No match, Use the sysreg mapper to map the remaining possible strings to
4092 // the value for the register to be used for the instruction operand.
4093 const auto *TheReg =
4094 AArch64SysReg::lookupSysRegByName(RegString->getString());
4095 if (TheReg && TheReg->Readable &&
4096 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4097 Imm = TheReg->Encoding;
4098 else
4099 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4100
4101 if (Imm == -1) {
4102 // Still no match, see if this is "pc" or give up.
4103 if (!ReadIs128Bit && RegString->getString() == "pc") {
4104 Opcode64Bit = AArch64::ADR;
4105 Imm = 0;
4106 } else {
4107 return false;
4108 }
4109 }
4110 }
4111
4112 SDValue InChain = N->getOperand(0);
4113 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4114 if (!ReadIs128Bit) {
4115 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4116 {SysRegImm, InChain});
4117 } else {
4118 SDNode *MRRS = CurDAG->getMachineNode(
4119 AArch64::MRRS, DL,
4120 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4121 {SysRegImm, InChain});
4122
4123 // Sysregs are not endian. The even register always contains the low half
4124 // of the register.
4125 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4126 SDValue(MRRS, 0));
4127 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4128 SDValue(MRRS, 0));
4129 SDValue OutChain = SDValue(MRRS, 1);
4130
4131 ReplaceUses(SDValue(N, 0), Lo);
4132 ReplaceUses(SDValue(N, 1), Hi);
4133 ReplaceUses(SDValue(N, 2), OutChain);
4134 };
4135 return true;
4136}
4137
4138// Lower the write_register intrinsic to an MSR instruction node if the special
4139// register string argument is either of the form detailed in the ALCE (the
4140// form described in getIntOperandsFromRegisterString) or is a named register
4141// known by the MSR SysReg mapper.
4142bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4143 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4144 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4145 SDLoc DL(N);
4146
4147 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4148
4149 if (!WriteIs128Bit) {
4150 // Check if the register was one of those allowed as the pstatefield value
4151 // in the MSR (immediate) instruction. To accept the values allowed in the
4152 // pstatefield for the MSR (immediate) instruction, we also require that an
4153 // immediate value has been provided as an argument, we know that this is
4154 // the case as it has been ensured by semantic checking.
4155 auto trySelectPState = [&](auto PMapper, unsigned State) {
4156 if (PMapper) {
4157 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4158 "Expected a constant integer expression.");
4159 unsigned Reg = PMapper->Encoding;
4160 uint64_t Immed = N->getConstantOperandVal(2);
4161 CurDAG->SelectNodeTo(
4162 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4163 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4164 return true;
4165 }
4166 return false;
4167 };
4168
4169 if (trySelectPState(
4170 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4171 AArch64::MSRpstateImm4))
4172 return true;
4173 if (trySelectPState(
4174 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4175 AArch64::MSRpstateImm1))
4176 return true;
4177 }
4178
4179 int Imm = getIntOperandFromRegisterString(RegString->getString());
4180 if (Imm == -1) {
4181 // Use the sysreg mapper to attempt to map the remaining possible strings
4182 // to the value for the register to be used for the MSR (register)
4183 // instruction operand.
4184 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4185 if (TheReg && TheReg->Writeable &&
4186 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4187 Imm = TheReg->Encoding;
4188 else
4189 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4190
4191 if (Imm == -1)
4192 return false;
4193 }
4194
4195 SDValue InChain = N->getOperand(0);
4196 if (!WriteIs128Bit) {
4197 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4198 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4199 N->getOperand(2), InChain);
4200 } else {
4201 // No endian swap. The lower half always goes into the even subreg, and the
4202 // higher half always into the odd supreg.
4203 SDNode *Pair = CurDAG->getMachineNode(
4204 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4205 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4206 MVT::i32),
4207 N->getOperand(2),
4208 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4209 N->getOperand(3),
4210 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4211
4212 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4213 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4214 SDValue(Pair, 0), InChain);
4215 }
4216
4217 return true;
4218}
4219
4220/// We've got special pseudo-instructions for these
4221bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4222 unsigned Opcode;
4223 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4224
4225 // Leave IR for LSE if subtarget supports it.
4226 if (Subtarget->hasLSE()) return false;
4227
4228 if (MemTy == MVT::i8)
4229 Opcode = AArch64::CMP_SWAP_8;
4230 else if (MemTy == MVT::i16)
4231 Opcode = AArch64::CMP_SWAP_16;
4232 else if (MemTy == MVT::i32)
4233 Opcode = AArch64::CMP_SWAP_32;
4234 else if (MemTy == MVT::i64)
4235 Opcode = AArch64::CMP_SWAP_64;
4236 else
4237 llvm_unreachable("Unknown AtomicCmpSwap type");
4238
4239 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4240 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4241 N->getOperand(0)};
4242 SDNode *CmpSwap = CurDAG->getMachineNode(
4243 Opcode, SDLoc(N),
4244 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4245
4246 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4247 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4248
4249 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4250 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4251 CurDAG->RemoveDeadNode(N);
4252
4253 return true;
4254}
4255
4256bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4257 SDValue &Shift, bool Negate) {
4258 if (!isa<ConstantSDNode>(N))
4259 return false;
4260
4261 SDLoc DL(N);
4262 APInt Val =
4263 cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4264
4265 if (Negate)
4266 Val = -Val;
4267
4268 switch (VT.SimpleTy) {
4269 case MVT::i8:
4270 // All immediates are supported.
4271 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4272 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4273 return true;
4274 case MVT::i16:
4275 case MVT::i32:
4276 case MVT::i64:
4277 // Support 8bit unsigned immediates.
4278 if ((Val & ~0xff) == 0) {
4279 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4280 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4281 return true;
4282 }
4283 // Support 16bit unsigned immediates that are a multiple of 256.
4284 if ((Val & ~0xff00) == 0) {
4285 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4286 Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
4287 return true;
4288 }
4289 break;
4290 default:
4291 break;
4292 }
4293
4294 return false;
4295}
4296
4297bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4298 SDValue &Imm, SDValue &Shift,
4299 bool Negate) {
4300 if (!isa<ConstantSDNode>(N))
4301 return false;
4302
4303 SDLoc DL(N);
4304 int64_t Val = cast<ConstantSDNode>(N)
4305 ->getAPIntValue()
4307 .getSExtValue();
4308
4309 if (Negate)
4310 Val = -Val;
4311
4312 // Signed saturating instructions treat their immediate operand as unsigned,
4313 // whereas the related intrinsics define their operands to be signed. This
4314 // means we can only use the immediate form when the operand is non-negative.
4315 if (Val < 0)
4316 return false;
4317
4318 switch (VT.SimpleTy) {
4319 case MVT::i8:
4320 // All positive immediates are supported.
4321 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4322 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4323 return true;
4324 case MVT::i16:
4325 case MVT::i32:
4326 case MVT::i64:
4327 // Support 8bit positive immediates.
4328 if (Val <= 255) {
4329 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4330 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4331 return true;
4332 }
4333 // Support 16bit positive immediates that are a multiple of 256.
4334 if (Val <= 65280 && Val % 256 == 0) {
4335 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4336 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4337 return true;
4338 }
4339 break;
4340 default:
4341 break;
4342 }
4343
4344 return false;
4345}
4346
4347bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4348 SDValue &Shift) {
4349 if (!isa<ConstantSDNode>(N))
4350 return false;
4351
4352 SDLoc DL(N);
4353 int64_t Val = cast<ConstantSDNode>(N)
4354 ->getAPIntValue()
4355 .trunc(VT.getFixedSizeInBits())
4356 .getSExtValue();
4357 int32_t ImmVal, ShiftVal;
4358 if (!AArch64_AM::isSVECpyDupImm(VT.getScalarSizeInBits(), Val, ImmVal,
4359 ShiftVal))
4360 return false;
4361
4362 Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32);
4363 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4364 return true;
4365}
4366
4367bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4368 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4369 int64_t ImmVal = CNode->getSExtValue();
4370 SDLoc DL(N);
4371 if (ImmVal >= -128 && ImmVal < 128) {
4372 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4373 return true;
4374 }
4375 }
4376 return false;
4377}
4378
4379bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4380 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4381 uint64_t ImmVal = CNode->getZExtValue();
4382
4383 switch (VT.SimpleTy) {
4384 case MVT::i8:
4385 ImmVal &= 0xFF;
4386 break;
4387 case MVT::i16:
4388 ImmVal &= 0xFFFF;
4389 break;
4390 case MVT::i32:
4391 ImmVal &= 0xFFFFFFFF;
4392 break;
4393 case MVT::i64:
4394 break;
4395 default:
4396 llvm_unreachable("Unexpected type");
4397 }
4398
4399 if (ImmVal < 256) {
4400 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4401 return true;
4402 }
4403 }
4404 return false;
4405}
4406
4407bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4408 bool Invert) {
4409 uint64_t ImmVal;
4410 if (auto CI = dyn_cast<ConstantSDNode>(N))
4411 ImmVal = CI->getZExtValue();
4412 else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
4413 ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
4414 else
4415 return false;
4416
4417 if (Invert)
4418 ImmVal = ~ImmVal;
4419
4420 uint64_t encoding;
4421 if (!AArch64_AM::isSVELogicalImm(VT.getScalarSizeInBits(), ImmVal, encoding))
4422 return false;
4423
4424 Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
4425 return true;
4426}
4427
4428// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4429// Rather than attempt to normalise everything we can sometimes saturate the
4430// shift amount during selection. This function also allows for consistent
4431// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4432// required by the instructions.
4433bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4434 uint64_t High, bool AllowSaturation,
4435 SDValue &Imm) {
4436 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4437 uint64_t ImmVal = CN->getZExtValue();
4438
4439 // Reject shift amounts that are too small.
4440 if (ImmVal < Low)
4441 return false;
4442
4443 // Reject or saturate shift amounts that are too big.
4444 if (ImmVal > High) {
4445 if (!AllowSaturation)
4446 return false;
4447 ImmVal = High;
4448 }
4449
4450 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4451 return true;
4452 }
4453
4454 return false;
4455}
4456
4457bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4458 // tagp(FrameIndex, IRGstack, tag_offset):
4459 // since the offset between FrameIndex and IRGstack is a compile-time
4460 // constant, this can be lowered to a single ADDG instruction.
4461 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4462 return false;
4463 }
4464
4465 SDValue IRG_SP = N->getOperand(2);
4466 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4467 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4468 return false;
4469 }
4470
4471 const TargetLowering *TLI = getTargetLowering();
4472 SDLoc DL(N);
4473 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4474 SDValue FiOp = CurDAG->getTargetFrameIndex(
4475 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4476 int TagOffset = N->getConstantOperandVal(3);
4477
4478 SDNode *Out = CurDAG->getMachineNode(
4479 AArch64::TAGPstack, DL, MVT::i64,
4480 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4481 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4482 ReplaceNode(N, Out);
4483 return true;
4484}
4485
4486void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4487 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4488 "llvm.aarch64.tagp third argument must be an immediate");
4489 if (trySelectStackSlotTagP(N))
4490 return;
4491 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4492 // compile-time constant, not just for stack allocations.
4493
4494 // General case for unrelated pointers in Op1 and Op2.
4495 SDLoc DL(N);
4496 int TagOffset = N->getConstantOperandVal(3);
4497 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4498 {N->getOperand(1), N->getOperand(2)});
4499 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4500 {SDValue(N1, 0), N->getOperand(2)});
4501 SDNode *N3 = CurDAG->getMachineNode(
4502 AArch64::ADDG, DL, MVT::i64,
4503 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4504 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4505 ReplaceNode(N, N3);
4506}
4507
4508bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4509 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4510
4511 // Bail when not a "cast" like insert_subvector.
4512 if (N->getConstantOperandVal(2) != 0)
4513 return false;
4514 if (!N->getOperand(0).isUndef())
4515 return false;
4516
4517 // Bail when normal isel should do the job.
4518 EVT VT = N->getValueType(0);
4519 EVT InVT = N->getOperand(1).getValueType();
4520 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4521 return false;
4522 if (InVT.getSizeInBits() <= 128)
4523 return false;
4524
4525 // NOTE: We can only get here when doing fixed length SVE code generation.
4526 // We do manual selection because the types involved are not linked to real
4527 // registers (despite being legal) and must be coerced into SVE registers.
4528
4530 "Expected to insert into a packed scalable vector!");
4531
4532 SDLoc DL(N);
4533 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4534 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4535 N->getOperand(1), RC));
4536 return true;
4537}
4538
4539bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4540 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4541
4542 // Bail when not a "cast" like extract_subvector.
4543 if (N->getConstantOperandVal(1) != 0)
4544 return false;
4545
4546 // Bail when normal isel can do the job.
4547 EVT VT = N->getValueType(0);
4548 EVT InVT = N->getOperand(0).getValueType();
4549 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4550 return false;
4551 if (VT.getSizeInBits() <= 128)
4552 return false;
4553
4554 // NOTE: We can only get here when doing fixed length SVE code generation.
4555 // We do manual selection because the types involved are not linked to real
4556 // registers (despite being legal) and must be coerced into SVE registers.
4557
4559 "Expected to extract from a packed scalable vector!");
4560
4561 SDLoc DL(N);
4562 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4563 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4564 N->getOperand(0), RC));
4565 return true;
4566}
4567
4568bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4569 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4570
4571 SDValue N0 = N->getOperand(0);
4572 SDValue N1 = N->getOperand(1);
4573
4574 EVT VT = N->getValueType(0);
4575 SDLoc DL(N);
4576
4577 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4578 // Rotate by a constant is a funnel shift in IR which is exanded to
4579 // an OR with shifted operands.
4580 // We do the following transform:
4581 // OR N0, N1 -> xar (x, y, imm)
4582 // Where:
4583 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4584 // N0 = SHL_PRED true, V, splat(bits-imm)
4585 // V = (xor x, y)
4586 if (VT.isScalableVector() &&
4587 (Subtarget->hasSVE2() ||
4588 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4589 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4590 N1.getOpcode() != AArch64ISD::SRL_PRED)
4591 std::swap(N0, N1);
4592 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4593 N1.getOpcode() != AArch64ISD::SRL_PRED)
4594 return false;
4595
4596 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4597 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4598 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4599 return false;
4600
4601 if (N0.getOperand(1) != N1.getOperand(1))
4602 return false;
4603
4604 SDValue R1, R2;
4605 bool IsXOROperand = true;
4606 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4607 IsXOROperand = false;
4608 } else {
4609 R1 = N0.getOperand(1).getOperand(0);
4610 R2 = N1.getOperand(1).getOperand(1);
4611 }
4612
4613 APInt ShlAmt, ShrAmt;
4614 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4616 return false;
4617
4618 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4619 return false;
4620
4621 if (!IsXOROperand) {
4622 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4623 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4624 SDValue MOVIV = SDValue(MOV, 0);
4625
4626 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4627 SDNode *SubRegToReg = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL,
4628 VT, Zero, MOVIV, ZSub);
4629
4630 R1 = N1->getOperand(1);
4631 R2 = SDValue(SubRegToReg, 0);
4632 }
4633
4634 SDValue Imm =
4635 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4636
4637 SDValue Ops[] = {R1, R2, Imm};
4639 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4640 AArch64::XAR_ZZZI_D})) {
4641 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4642 return true;
4643 }
4644 return false;
4645 }
4646
4647 // We have Neon SHA3 XAR operation for v2i64 but for types
4648 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4649 // is available.
4650 EVT SVT;
4651 switch (VT.getSimpleVT().SimpleTy) {
4652 case MVT::v4i32:
4653 case MVT::v2i32:
4654 SVT = MVT::nxv4i32;
4655 break;
4656 case MVT::v8i16:
4657 case MVT::v4i16:
4658 SVT = MVT::nxv8i16;
4659 break;
4660 case MVT::v16i8:
4661 case MVT::v8i8:
4662 SVT = MVT::nxv16i8;
4663 break;
4664 case MVT::v2i64:
4665 case MVT::v1i64:
4666 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4667 break;
4668 default:
4669 return false;
4670 }
4671
4672 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4673 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4674 return false;
4675
4676 if (N0->getOpcode() != AArch64ISD::VSHL ||
4677 N1->getOpcode() != AArch64ISD::VLSHR)
4678 return false;
4679
4680 if (N0->getOperand(0) != N1->getOperand(0))
4681 return false;
4682
4683 SDValue R1, R2;
4684 bool IsXOROperand = true;
4685 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4686 IsXOROperand = false;
4687 } else {
4688 SDValue XOR = N0.getOperand(0);
4689 R1 = XOR.getOperand(0);
4690 R2 = XOR.getOperand(1);
4691 }
4692
4693 unsigned HsAmt = N0.getConstantOperandVal(1);
4694 unsigned ShAmt = N1.getConstantOperandVal(1);
4695
4696 SDValue Imm = CurDAG->getTargetConstant(
4697 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4698
4699 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4700 if (ShAmt + HsAmt != VTSizeInBits)
4701 return false;
4702
4703 if (!IsXOROperand) {
4704 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4705 SDNode *MOV =
4706 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4707 SDValue MOVIV = SDValue(MOV, 0);
4708
4709 R1 = N1->getOperand(0);
4710 R2 = MOVIV;
4711 }
4712
4713 if (SVT != VT) {
4714 SDValue Undef =
4715 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4716
4717 if (SVT.isScalableVector() && VT.is64BitVector()) {
4718 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4719
4720 SDValue UndefQ = SDValue(
4721 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4722 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4723
4724 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4725 UndefQ, R1, DSub),
4726 0);
4727 if (R2.getValueType() == VT)
4728 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4729 UndefQ, R2, DSub),
4730 0);
4731 }
4732
4733 SDValue SubReg = CurDAG->getTargetConstant(
4734 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
4735
4736 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4737 R1, SubReg),
4738 0);
4739
4740 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4741 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4742 Undef, R2, SubReg),
4743 0);
4744 }
4745
4746 SDValue Ops[] = {R1, R2, Imm};
4747 SDNode *XAR = nullptr;
4748
4749 if (SVT.isScalableVector()) {
4751 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4752 AArch64::XAR_ZZZI_D}))
4753 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
4754 } else {
4755 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
4756 }
4757
4758 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4759
4760 if (SVT != VT) {
4761 if (VT.is64BitVector() && SVT.isScalableVector()) {
4762 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4763
4764 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4765 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
4766 SDValue(XAR, 0), ZSub);
4767
4768 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4769 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4770 SDValue(Q, 0), DSub);
4771 } else {
4772 SDValue SubReg = CurDAG->getTargetConstant(
4773 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4774 MVT::i32);
4775 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4776 SDValue(XAR, 0), SubReg);
4777 }
4778 }
4779 ReplaceNode(N, XAR);
4780 return true;
4781}
4782
4783void AArch64DAGToDAGISel::Select(SDNode *Node) {
4784 // If we have a custom node, we already have selected!
4785 if (Node->isMachineOpcode()) {
4786 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4787 Node->setNodeId(-1);
4788 return;
4789 }
4790
4791 // Few custom selection stuff.
4792 EVT VT = Node->getValueType(0);
4793
4794 switch (Node->getOpcode()) {
4795 default:
4796 break;
4797
4799 if (SelectCMP_SWAP(Node))
4800 return;
4801 break;
4802
4803 case ISD::READ_REGISTER:
4804 case AArch64ISD::MRRS:
4805 if (tryReadRegister(Node))
4806 return;
4807 break;
4808
4810 case AArch64ISD::MSRR:
4811 if (tryWriteRegister(Node))
4812 return;
4813 break;
4814
4815 case ISD::LOAD: {
4816 // Try to select as an indexed load. Fall through to normal processing
4817 // if we can't.
4818 if (tryIndexedLoad(Node))
4819 return;
4820 break;
4821 }
4822
4823 case ISD::SRL:
4824 case ISD::AND:
4825 case ISD::SRA:
4827 if (tryBitfieldExtractOp(Node))
4828 return;
4829 if (tryBitfieldInsertInZeroOp(Node))
4830 return;
4831 [[fallthrough]];
4832 case ISD::ROTR:
4833 case ISD::SHL:
4834 if (tryShiftAmountMod(Node))
4835 return;
4836 break;
4837
4838 case ISD::SIGN_EXTEND:
4839 if (tryBitfieldExtractOpFromSExt(Node))
4840 return;
4841 break;
4842
4843 case ISD::OR:
4844 if (tryBitfieldInsertOp(Node))
4845 return;
4846 if (trySelectXAR(Node))
4847 return;
4848 break;
4849
4851 if (trySelectCastScalableToFixedLengthVector(Node))
4852 return;
4853 break;
4854 }
4855
4856 case ISD::INSERT_SUBVECTOR: {
4857 if (trySelectCastFixedLengthToScalableVector(Node))
4858 return;
4859 break;
4860 }
4861
4862 case ISD::Constant: {
4863 // Materialize zero constants as copies from WZR/XZR. This allows
4864 // the coalescer to propagate these into other instructions.
4865 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4866 if (ConstNode->isZero()) {
4867 if (VT == MVT::i32) {
4868 SDValue New = CurDAG->getCopyFromReg(
4869 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4870 ReplaceNode(Node, New.getNode());
4871 return;
4872 } else if (VT == MVT::i64) {
4873 SDValue New = CurDAG->getCopyFromReg(
4874 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4875 ReplaceNode(Node, New.getNode());
4876 return;
4877 }
4878 }
4879 break;
4880 }
4881
4882 case ISD::FrameIndex: {
4883 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4884 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4885 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4886 const TargetLowering *TLI = getTargetLowering();
4887 SDValue TFI = CurDAG->getTargetFrameIndex(
4888 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4889 SDLoc DL(Node);
4890 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4891 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4892 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4893 return;
4894 }
4896 unsigned IntNo = Node->getConstantOperandVal(1);
4897 switch (IntNo) {
4898 default:
4899 break;
4900 case Intrinsic::aarch64_gcsss: {
4901 SDLoc DL(Node);
4902 SDValue Chain = Node->getOperand(0);
4903 SDValue Val = Node->getOperand(2);
4904 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4905 SDNode *SS1 =
4906 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4907 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4908 MVT::Other, Zero, SDValue(SS1, 0));
4909 ReplaceNode(Node, SS2);
4910 return;
4911 }
4912 case Intrinsic::aarch64_ldaxp:
4913 case Intrinsic::aarch64_ldxp: {
4914 unsigned Op =
4915 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4916 SDValue MemAddr = Node->getOperand(2);
4917 SDLoc DL(Node);
4918 SDValue Chain = Node->getOperand(0);
4919
4920 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4921 MVT::Other, MemAddr, Chain);
4922
4923 // Transfer memoperands.
4924 MachineMemOperand *MemOp =
4925 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4926 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4927 ReplaceNode(Node, Ld);
4928 return;
4929 }
4930 case Intrinsic::aarch64_stlxp:
4931 case Intrinsic::aarch64_stxp: {
4932 unsigned Op =
4933 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4934 SDLoc DL(Node);
4935 SDValue Chain = Node->getOperand(0);
4936 SDValue ValLo = Node->getOperand(2);
4937 SDValue ValHi = Node->getOperand(3);
4938 SDValue MemAddr = Node->getOperand(4);
4939
4940 // Place arguments in the right order.
4941 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4942
4943 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4944 // Transfer memoperands.
4945 MachineMemOperand *MemOp =
4946 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4947 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4948
4949 ReplaceNode(Node, St);
4950 return;
4951 }
4952 case Intrinsic::aarch64_neon_ld1x2:
4953 if (VT == MVT::v8i8) {
4954 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4955 return;
4956 } else if (VT == MVT::v16i8) {
4957 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4958 return;
4959 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4960 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4961 return;
4962 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4963 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4964 return;
4965 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4966 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4967 return;
4968 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4969 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4970 return;
4971 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4972 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4973 return;
4974 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4975 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4976 return;
4977 }
4978 break;
4979 case Intrinsic::aarch64_neon_ld1x3:
4980 if (VT == MVT::v8i8) {
4981 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4982 return;
4983 } else if (VT == MVT::v16i8) {
4984 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4985 return;
4986 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4987 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4988 return;
4989 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4990 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4991 return;
4992 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4993 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4994 return;
4995 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4996 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4997 return;
4998 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4999 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5000 return;
5001 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5002 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5003 return;
5004 }
5005 break;
5006 case Intrinsic::aarch64_neon_ld1x4:
5007 if (VT == MVT::v8i8) {
5008 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5009 return;
5010 } else if (VT == MVT::v16i8) {
5011 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5012 return;
5013 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5014 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5015 return;
5016 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5017 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5018 return;
5019 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5020 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5021 return;
5022 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5023 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5024 return;
5025 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5026 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5027 return;
5028 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5029 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5030 return;
5031 }
5032 break;
5033 case Intrinsic::aarch64_neon_ld2:
5034 if (VT == MVT::v8i8) {
5035 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5036 return;
5037 } else if (VT == MVT::v16i8) {
5038 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5039 return;
5040 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5041 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5042 return;
5043 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5044 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5045 return;
5046 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5047 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5048 return;
5049 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5050 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5051 return;
5052 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5053 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5054 return;
5055 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5056 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5057 return;
5058 }
5059 break;
5060 case Intrinsic::aarch64_neon_ld3:
5061 if (VT == MVT::v8i8) {
5062 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5063 return;
5064 } else if (VT == MVT::v16i8) {
5065 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5066 return;
5067 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5068 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5069 return;
5070 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5071 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5072 return;
5073 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5074 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5075 return;
5076 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5077 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5078 return;
5079 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5080 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5081 return;
5082 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5083 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5084 return;
5085 }
5086 break;
5087 case Intrinsic::aarch64_neon_ld4:
5088 if (VT == MVT::v8i8) {
5089 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5090 return;
5091 } else if (VT == MVT::v16i8) {
5092 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5093 return;
5094 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5095 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5096 return;
5097 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5098 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5099 return;
5100 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5101 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5102 return;
5103 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5104 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5105 return;
5106 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5107 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5108 return;
5109 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5110 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5111 return;
5112 }
5113 break;
5114 case Intrinsic::aarch64_neon_ld2r:
5115 if (VT == MVT::v8i8) {
5116 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5117 return;
5118 } else if (VT == MVT::v16i8) {
5119 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5120 return;
5121 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5122 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5123 return;
5124 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5125 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5126 return;
5127 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5128 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5129 return;
5130 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5131 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5132 return;
5133 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5134 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5135 return;
5136 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5137 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5138 return;
5139 }
5140 break;
5141 case Intrinsic::aarch64_neon_ld3r:
5142 if (VT == MVT::v8i8) {
5143 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5144 return;
5145 } else if (VT == MVT::v16i8) {
5146 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5147 return;
5148 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5149 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5150 return;
5151 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5152 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5153 return;
5154 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5155 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5156 return;
5157 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5158 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5159 return;
5160 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5161 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5162 return;
5163 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5164 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5165 return;
5166 }
5167 break;
5168 case Intrinsic::aarch64_neon_ld4r:
5169 if (VT == MVT::v8i8) {
5170 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5171 return;
5172 } else if (VT == MVT::v16i8) {
5173 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5174 return;
5175 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5176 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5177 return;
5178 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5179 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5180 return;
5181 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5182 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5183 return;
5184 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5185 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5186 return;
5187 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5188 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5189 return;
5190 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5191 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5192 return;
5193 }
5194 break;
5195 case Intrinsic::aarch64_neon_ld2lane:
5196 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5197 SelectLoadLane(Node, 2, AArch64::LD2i8);
5198 return;
5199 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5200 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5201 SelectLoadLane(Node, 2, AArch64::LD2i16);
5202 return;
5203 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5204 VT == MVT::v2f32) {
5205 SelectLoadLane(Node, 2, AArch64::LD2i32);
5206 return;
5207 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5208 VT == MVT::v1f64) {
5209 SelectLoadLane(Node, 2, AArch64::LD2i64);
5210 return;
5211 }
5212 break;
5213 case Intrinsic::aarch64_neon_ld3lane:
5214 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5215 SelectLoadLane(Node, 3, AArch64::LD3i8);
5216 return;
5217 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5218 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5219 SelectLoadLane(Node, 3, AArch64::LD3i16);
5220 return;
5221 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5222 VT == MVT::v2f32) {
5223 SelectLoadLane(Node, 3, AArch64::LD3i32);
5224 return;
5225 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5226 VT == MVT::v1f64) {
5227 SelectLoadLane(Node, 3, AArch64::LD3i64);
5228 return;
5229 }
5230 break;
5231 case Intrinsic::aarch64_neon_ld4lane:
5232 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5233 SelectLoadLane(Node, 4, AArch64::LD4i8);
5234 return;
5235 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5236 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5237 SelectLoadLane(Node, 4, AArch64::LD4i16);
5238 return;
5239 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5240 VT == MVT::v2f32) {
5241 SelectLoadLane(Node, 4, AArch64::LD4i32);
5242 return;
5243 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5244 VT == MVT::v1f64) {
5245 SelectLoadLane(Node, 4, AArch64::LD4i64);
5246 return;
5247 }
5248 break;
5249 case Intrinsic::aarch64_ld64b:
5250 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5251 return;
5252 case Intrinsic::aarch64_sve_ld2q_sret: {
5253 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5254 return;
5255 }
5256 case Intrinsic::aarch64_sve_ld3q_sret: {
5257 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5258 return;
5259 }
5260 case Intrinsic::aarch64_sve_ld4q_sret: {
5261 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5262 return;
5263 }
5264 case Intrinsic::aarch64_sve_ld2_sret: {
5265 if (VT == MVT::nxv16i8) {
5266 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5267 true);
5268 return;
5269 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5270 VT == MVT::nxv8bf16) {
5271 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5272 true);
5273 return;
5274 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5275 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5276 true);
5277 return;
5278 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5279 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5280 true);
5281 return;
5282 }
5283 break;
5284 }
5285 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5286 if (VT == MVT::nxv16i8) {
5287 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5288 SelectContiguousMultiVectorLoad(
5289 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5290 else if (Subtarget->hasSVE2p1())
5291 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5292 AArch64::LD1B_2Z);
5293 else
5294 break;
5295 return;
5296 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5297 VT == MVT::nxv8bf16) {
5298 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5299 SelectContiguousMultiVectorLoad(
5300 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5301 else if (Subtarget->hasSVE2p1())
5302 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5303 AArch64::LD1H_2Z);
5304 else
5305 break;
5306 return;
5307 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5308 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5309 SelectContiguousMultiVectorLoad(
5310 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5311 else if (Subtarget->hasSVE2p1())
5312 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5313 AArch64::LD1W_2Z);
5314 else
5315 break;
5316 return;
5317 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5318 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5319 SelectContiguousMultiVectorLoad(
5320 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5321 else if (Subtarget->hasSVE2p1())
5322 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5323 AArch64::LD1D_2Z);
5324 else
5325 break;
5326 return;
5327 }
5328 break;
5329 }
5330 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5331 if (VT == MVT::nxv16i8) {
5332 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5333 SelectContiguousMultiVectorLoad(
5334 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5335 else if (Subtarget->hasSVE2p1())
5336 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5337 AArch64::LD1B_4Z);
5338 else
5339 break;
5340 return;
5341 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5342 VT == MVT::nxv8bf16) {
5343 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5344 SelectContiguousMultiVectorLoad(
5345 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5346 else if (Subtarget->hasSVE2p1())
5347 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5348 AArch64::LD1H_4Z);
5349 else
5350 break;
5351 return;
5352 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5353 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5354 SelectContiguousMultiVectorLoad(
5355 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5356 else if (Subtarget->hasSVE2p1())
5357 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5358 AArch64::LD1W_4Z);
5359 else
5360 break;
5361 return;
5362 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5363 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5364 SelectContiguousMultiVectorLoad(
5365 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5366 else if (Subtarget->hasSVE2p1())
5367 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5368 AArch64::LD1D_4Z);
5369 else
5370 break;
5371 return;
5372 }
5373 break;
5374 }
5375 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5376 if (VT == MVT::nxv16i8) {
5377 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5378 SelectContiguousMultiVectorLoad(Node, 2, 0,
5379 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5380 AArch64::LDNT1B_2Z_PSEUDO);
5381 else if (Subtarget->hasSVE2p1())
5382 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5383 AArch64::LDNT1B_2Z);
5384 else
5385 break;
5386 return;
5387 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5388 VT == MVT::nxv8bf16) {
5389 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5390 SelectContiguousMultiVectorLoad(Node, 2, 1,
5391 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5392 AArch64::LDNT1H_2Z_PSEUDO);
5393 else if (Subtarget->hasSVE2p1())
5394 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5395 AArch64::LDNT1H_2Z);
5396 else
5397 break;
5398 return;
5399 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5400 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5401 SelectContiguousMultiVectorLoad(Node, 2, 2,
5402 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5403 AArch64::LDNT1W_2Z_PSEUDO);
5404 else if (Subtarget->hasSVE2p1())
5405 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5406 AArch64::LDNT1W_2Z);
5407 else
5408 break;
5409 return;
5410 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5411 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5412 SelectContiguousMultiVectorLoad(Node, 2, 3,
5413 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5414 AArch64::LDNT1D_2Z_PSEUDO);
5415 else if (Subtarget->hasSVE2p1())
5416 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5417 AArch64::LDNT1D_2Z);
5418 else
5419 break;
5420 return;
5421 }
5422 break;
5423 }
5424 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5425 if (VT == MVT::nxv16i8) {
5426 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5427 SelectContiguousMultiVectorLoad(Node, 4, 0,
5428 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5429 AArch64::LDNT1B_4Z_PSEUDO);
5430 else if (Subtarget->hasSVE2p1())
5431 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5432 AArch64::LDNT1B_4Z);
5433 else
5434 break;
5435 return;
5436 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5437 VT == MVT::nxv8bf16) {
5438 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5439 SelectContiguousMultiVectorLoad(Node, 4, 1,
5440 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5441 AArch64::LDNT1H_4Z_PSEUDO);
5442 else if (Subtarget->hasSVE2p1())
5443 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5444 AArch64::LDNT1H_4Z);
5445 else
5446 break;
5447 return;
5448 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5449 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5450 SelectContiguousMultiVectorLoad(Node, 4, 2,
5451 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5452 AArch64::LDNT1W_4Z_PSEUDO);
5453 else if (Subtarget->hasSVE2p1())
5454 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5455 AArch64::LDNT1W_4Z);
5456 else
5457 break;
5458 return;
5459 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5460 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5461 SelectContiguousMultiVectorLoad(Node, 4, 3,
5462 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5463 AArch64::LDNT1D_4Z_PSEUDO);
5464 else if (Subtarget->hasSVE2p1())
5465 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5466 AArch64::LDNT1D_4Z);
5467 else
5468 break;
5469 return;
5470 }
5471 break;
5472 }
5473 case Intrinsic::aarch64_sve_ld3_sret: {
5474 if (VT == MVT::nxv16i8) {
5475 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5476 true);
5477 return;
5478 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5479 VT == MVT::nxv8bf16) {
5480 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5481 true);
5482 return;
5483 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5484 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5485 true);
5486 return;
5487 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5488 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5489 true);
5490 return;
5491 }
5492 break;
5493 }
5494 case Intrinsic::aarch64_sve_ld4_sret: {
5495 if (VT == MVT::nxv16i8) {
5496 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5497 true);
5498 return;
5499 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5500 VT == MVT::nxv8bf16) {
5501 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5502 true);
5503 return;
5504 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5505 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5506 true);
5507 return;
5508 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5509 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5510 true);
5511 return;
5512 }
5513 break;
5514 }
5515 case Intrinsic::aarch64_sme_read_hor_vg2: {
5516 if (VT == MVT::nxv16i8) {
5517 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5518 AArch64::MOVA_2ZMXI_H_B);
5519 return;
5520 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5521 VT == MVT::nxv8bf16) {
5522 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5523 AArch64::MOVA_2ZMXI_H_H);
5524 return;
5525 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5526 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5527 AArch64::MOVA_2ZMXI_H_S);
5528 return;
5529 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5530 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5531 AArch64::MOVA_2ZMXI_H_D);
5532 return;
5533 }
5534 break;
5535 }
5536 case Intrinsic::aarch64_sme_read_ver_vg2: {
5537 if (VT == MVT::nxv16i8) {
5538 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5539 AArch64::MOVA_2ZMXI_V_B);
5540 return;
5541 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5542 VT == MVT::nxv8bf16) {
5543 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5544 AArch64::MOVA_2ZMXI_V_H);
5545 return;
5546 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5547 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5548 AArch64::MOVA_2ZMXI_V_S);
5549 return;
5550 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5551 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5552 AArch64::MOVA_2ZMXI_V_D);
5553 return;
5554 }
5555 break;
5556 }
5557 case Intrinsic::aarch64_sme_read_hor_vg4: {
5558 if (VT == MVT::nxv16i8) {
5559 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5560 AArch64::MOVA_4ZMXI_H_B);
5561 return;
5562 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5563 VT == MVT::nxv8bf16) {
5564 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5565 AArch64::MOVA_4ZMXI_H_H);
5566 return;
5567 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5568 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5569 AArch64::MOVA_4ZMXI_H_S);
5570 return;
5571 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5572 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5573 AArch64::MOVA_4ZMXI_H_D);
5574 return;
5575 }
5576 break;
5577 }
5578 case Intrinsic::aarch64_sme_read_ver_vg4: {
5579 if (VT == MVT::nxv16i8) {
5580 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5581 AArch64::MOVA_4ZMXI_V_B);
5582 return;
5583 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5584 VT == MVT::nxv8bf16) {
5585 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5586 AArch64::MOVA_4ZMXI_V_H);
5587 return;
5588 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5589 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5590 AArch64::MOVA_4ZMXI_V_S);
5591 return;
5592 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5593 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5594 AArch64::MOVA_4ZMXI_V_D);
5595 return;
5596 }
5597 break;
5598 }
5599 case Intrinsic::aarch64_sme_read_vg1x2: {
5600 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5601 AArch64::MOVA_VG2_2ZMXI);
5602 return;
5603 }
5604 case Intrinsic::aarch64_sme_read_vg1x4: {
5605 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5606 AArch64::MOVA_VG4_4ZMXI);
5607 return;
5608 }
5609 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5610 if (VT == MVT::nxv16i8) {
5611 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5612 return;
5613 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5614 VT == MVT::nxv8bf16) {
5615 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5616 return;
5617 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5618 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5619 return;
5620 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5621 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5622 return;
5623 }
5624 break;
5625 }
5626 case Intrinsic::aarch64_sme_readz_vert_x2: {
5627 if (VT == MVT::nxv16i8) {
5628 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5629 return;
5630 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5631 VT == MVT::nxv8bf16) {
5632 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5633 return;
5634 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5635 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5636 return;
5637 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5638 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5639 return;
5640 }
5641 break;
5642 }
5643 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5644 if (VT == MVT::nxv16i8) {
5645 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5646 return;
5647 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5648 VT == MVT::nxv8bf16) {
5649 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5650 return;
5651 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5652 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5653 return;
5654 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5655 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5656 return;
5657 }
5658 break;
5659 }
5660 case Intrinsic::aarch64_sme_readz_vert_x4: {
5661 if (VT == MVT::nxv16i8) {
5662 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5663 return;
5664 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5665 VT == MVT::nxv8bf16) {
5666 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5667 return;
5668 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5669 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5670 return;
5671 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5672 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5673 return;
5674 }
5675 break;
5676 }
5677 case Intrinsic::aarch64_sme_readz_x2: {
5678 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5679 AArch64::ZA);
5680 return;
5681 }
5682 case Intrinsic::aarch64_sme_readz_x4: {
5683 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5684 AArch64::ZA);
5685 return;
5686 }
5687 case Intrinsic::swift_async_context_addr: {
5688 SDLoc DL(Node);
5689 SDValue Chain = Node->getOperand(0);
5690 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5691 SDValue Res = SDValue(
5692 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5693 CurDAG->getTargetConstant(8, DL, MVT::i32),
5694 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5695 0);
5696 ReplaceUses(SDValue(Node, 0), Res);
5697 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5698 CurDAG->RemoveDeadNode(Node);
5699
5700 auto &MF = CurDAG->getMachineFunction();
5701 MF.getFrameInfo().setFrameAddressIsTaken(true);
5702 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5703 return;
5704 }
5705 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5707 Node->getValueType(0),
5708 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5709 AArch64::LUTI2_4ZTZI_S}))
5710 // Second Immediate must be <= 3:
5711 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5712 return;
5713 }
5714 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5716 Node->getValueType(0),
5717 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5718 // Second Immediate must be <= 1:
5719 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5720 return;
5721 }
5722 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5724 Node->getValueType(0),
5725 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5726 AArch64::LUTI2_2ZTZI_S}))
5727 // Second Immediate must be <= 7:
5728 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5729 return;
5730 }
5731 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5733 Node->getValueType(0),
5734 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5735 AArch64::LUTI4_2ZTZI_S}))
5736 // Second Immediate must be <= 3:
5737 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5738 return;
5739 }
5740 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5741 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5742 return;
5743 }
5744 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5746 Node->getValueType(0),
5747 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5748 SelectCVTIntrinsicFP8(Node, 2, Opc);
5749 return;
5750 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5752 Node->getValueType(0),
5753 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5754 SelectCVTIntrinsicFP8(Node, 2, Opc);
5755 return;
5756 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5758 Node->getValueType(0),
5759 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5760 SelectCVTIntrinsicFP8(Node, 2, Opc);
5761 return;
5762 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5764 Node->getValueType(0),
5765 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5766 SelectCVTIntrinsicFP8(Node, 2, Opc);
5767 return;
5768 }
5769 } break;
5771 unsigned IntNo = Node->getConstantOperandVal(0);
5772 switch (IntNo) {
5773 default:
5774 break;
5775 case Intrinsic::aarch64_tagp:
5776 SelectTagP(Node);
5777 return;
5778
5779 case Intrinsic::ptrauth_auth:
5780 SelectPtrauthAuth(Node);
5781 return;
5782
5783 case Intrinsic::ptrauth_resign:
5784 SelectPtrauthResign(Node);
5785 return;
5786
5787 case Intrinsic::aarch64_neon_tbl2:
5788 SelectTable(Node, 2,
5789 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5790 false);
5791 return;
5792 case Intrinsic::aarch64_neon_tbl3:
5793 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5794 : AArch64::TBLv16i8Three,
5795 false);
5796 return;
5797 case Intrinsic::aarch64_neon_tbl4:
5798 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5799 : AArch64::TBLv16i8Four,
5800 false);
5801 return;
5802 case Intrinsic::aarch64_neon_tbx2:
5803 SelectTable(Node, 2,
5804 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5805 true);
5806 return;
5807 case Intrinsic::aarch64_neon_tbx3:
5808 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5809 : AArch64::TBXv16i8Three,
5810 true);
5811 return;
5812 case Intrinsic::aarch64_neon_tbx4:
5813 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5814 : AArch64::TBXv16i8Four,
5815 true);
5816 return;
5817 case Intrinsic::aarch64_sve_srshl_single_x2:
5819 Node->getValueType(0),
5820 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5821 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5822 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5823 return;
5824 case Intrinsic::aarch64_sve_srshl_single_x4:
5826 Node->getValueType(0),
5827 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5828 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5829 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5830 return;
5831 case Intrinsic::aarch64_sve_urshl_single_x2:
5833 Node->getValueType(0),
5834 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5835 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5836 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5837 return;
5838 case Intrinsic::aarch64_sve_urshl_single_x4:
5840 Node->getValueType(0),
5841 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5842 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5843 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5844 return;
5845 case Intrinsic::aarch64_sve_srshl_x2:
5847 Node->getValueType(0),
5848 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5849 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5850 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5851 return;
5852 case Intrinsic::aarch64_sve_srshl_x4:
5854 Node->getValueType(0),
5855 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5856 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5857 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5858 return;
5859 case Intrinsic::aarch64_sve_urshl_x2:
5861 Node->getValueType(0),
5862 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5863 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5864 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5865 return;
5866 case Intrinsic::aarch64_sve_urshl_x4:
5868 Node->getValueType(0),
5869 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5870 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5871 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5872 return;
5873 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5875 Node->getValueType(0),
5876 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5877 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5878 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5879 return;
5880 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5882 Node->getValueType(0),
5883 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5884 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5885 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5886 return;
5887 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5889 Node->getValueType(0),
5890 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5891 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5892 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5893 return;
5894 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5896 Node->getValueType(0),
5897 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5898 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5899 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5900 return;
5901 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
5903 Node->getValueType(0),
5904 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
5905 AArch64::FSCALE_2ZZ_D}))
5906 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5907 return;
5908 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
5910 Node->getValueType(0),
5911 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
5912 AArch64::FSCALE_4ZZ_D}))
5913 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5914 return;
5915 case Intrinsic::aarch64_sme_fp8_scale_x2:
5917 Node->getValueType(0),
5918 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
5919 AArch64::FSCALE_2Z2Z_D}))
5920 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5921 return;
5922 case Intrinsic::aarch64_sme_fp8_scale_x4:
5924 Node->getValueType(0),
5925 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
5926 AArch64::FSCALE_4Z4Z_D}))
5927 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5928 return;
5929 case Intrinsic::aarch64_sve_whilege_x2:
5931 Node->getValueType(0),
5932 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5933 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5934 SelectWhilePair(Node, Op);
5935 return;
5936 case Intrinsic::aarch64_sve_whilegt_x2:
5938 Node->getValueType(0),
5939 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5940 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5941 SelectWhilePair(Node, Op);
5942 return;
5943 case Intrinsic::aarch64_sve_whilehi_x2:
5945 Node->getValueType(0),
5946 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5947 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5948 SelectWhilePair(Node, Op);
5949 return;
5950 case Intrinsic::aarch64_sve_whilehs_x2:
5952 Node->getValueType(0),
5953 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5954 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5955 SelectWhilePair(Node, Op);
5956 return;
5957 case Intrinsic::aarch64_sve_whilele_x2:
5959 Node->getValueType(0),
5960 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5961 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5962 SelectWhilePair(Node, Op);
5963 return;
5964 case Intrinsic::aarch64_sve_whilelo_x2:
5966 Node->getValueType(0),
5967 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5968 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5969 SelectWhilePair(Node, Op);
5970 return;
5971 case Intrinsic::aarch64_sve_whilels_x2:
5973 Node->getValueType(0),
5974 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5975 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5976 SelectWhilePair(Node, Op);
5977 return;
5978 case Intrinsic::aarch64_sve_whilelt_x2:
5980 Node->getValueType(0),
5981 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5982 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5983 SelectWhilePair(Node, Op);
5984 return;
5985 case Intrinsic::aarch64_sve_smax_single_x2:
5987 Node->getValueType(0),
5988 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
5989 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
5990 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5991 return;
5992 case Intrinsic::aarch64_sve_umax_single_x2:
5994 Node->getValueType(0),
5995 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
5996 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
5997 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5998 return;
5999 case Intrinsic::aarch64_sve_fmax_single_x2:
6001 Node->getValueType(0),
6002 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6003 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6004 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6005 return;
6006 case Intrinsic::aarch64_sve_smax_single_x4:
6008 Node->getValueType(0),
6009 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6010 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6011 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6012 return;
6013 case Intrinsic::aarch64_sve_umax_single_x4:
6015 Node->getValueType(0),
6016 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6017 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6018 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6019 return;
6020 case Intrinsic::aarch64_sve_fmax_single_x4:
6022 Node->getValueType(0),
6023 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6024 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6025 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6026 return;
6027 case Intrinsic::aarch64_sve_smin_single_x2:
6029 Node->getValueType(0),
6030 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6031 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6032 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6033 return;
6034 case Intrinsic::aarch64_sve_umin_single_x2:
6036 Node->getValueType(0),
6037 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6038 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6039 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6040 return;
6041 case Intrinsic::aarch64_sve_fmin_single_x2:
6043 Node->getValueType(0),
6044 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6045 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6046 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6047 return;
6048 case Intrinsic::aarch64_sve_smin_single_x4:
6050 Node->getValueType(0),
6051 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6052 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6053 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6054 return;
6055 case Intrinsic::aarch64_sve_umin_single_x4:
6057 Node->getValueType(0),
6058 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6059 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6060 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6061 return;
6062 case Intrinsic::aarch64_sve_fmin_single_x4:
6064 Node->getValueType(0),
6065 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6066 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6067 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6068 return;
6069 case Intrinsic::aarch64_sve_smax_x2:
6071 Node->getValueType(0),
6072 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6073 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6074 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6075 return;
6076 case Intrinsic::aarch64_sve_umax_x2:
6078 Node->getValueType(0),
6079 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6080 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6081 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6082 return;
6083 case Intrinsic::aarch64_sve_fmax_x2:
6085 Node->getValueType(0),
6086 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6087 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6088 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6089 return;
6090 case Intrinsic::aarch64_sve_smax_x4:
6092 Node->getValueType(0),
6093 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6094 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6095 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6096 return;
6097 case Intrinsic::aarch64_sve_umax_x4:
6099 Node->getValueType(0),
6100 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6101 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6102 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6103 return;
6104 case Intrinsic::aarch64_sve_fmax_x4:
6106 Node->getValueType(0),
6107 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6108 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6109 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6110 return;
6111 case Intrinsic::aarch64_sme_famax_x2:
6113 Node->getValueType(0),
6114 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6115 AArch64::FAMAX_2Z2Z_D}))
6116 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6117 return;
6118 case Intrinsic::aarch64_sme_famax_x4:
6120 Node->getValueType(0),
6121 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6122 AArch64::FAMAX_4Z4Z_D}))
6123 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6124 return;
6125 case Intrinsic::aarch64_sme_famin_x2:
6127 Node->getValueType(0),
6128 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6129 AArch64::FAMIN_2Z2Z_D}))
6130 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6131 return;
6132 case Intrinsic::aarch64_sme_famin_x4:
6134 Node->getValueType(0),
6135 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6136 AArch64::FAMIN_4Z4Z_D}))
6137 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6138 return;
6139 case Intrinsic::aarch64_sve_smin_x2:
6141 Node->getValueType(0),
6142 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6143 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6144 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6145 return;
6146 case Intrinsic::aarch64_sve_umin_x2:
6148 Node->getValueType(0),
6149 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6150 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6151 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6152 return;
6153 case Intrinsic::aarch64_sve_fmin_x2:
6155 Node->getValueType(0),
6156 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6157 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6158 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6159 return;
6160 case Intrinsic::aarch64_sve_smin_x4:
6162 Node->getValueType(0),
6163 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6164 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6165 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6166 return;
6167 case Intrinsic::aarch64_sve_umin_x4:
6169 Node->getValueType(0),
6170 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6171 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6172 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6173 return;
6174 case Intrinsic::aarch64_sve_fmin_x4:
6176 Node->getValueType(0),
6177 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6178 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6179 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6180 return;
6181 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6183 Node->getValueType(0),
6184 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6185 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6186 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6187 return;
6188 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6190 Node->getValueType(0),
6191 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6192 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6193 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6194 return;
6195 case Intrinsic::aarch64_sve_fminnm_single_x2:
6197 Node->getValueType(0),
6198 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6199 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6200 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6201 return;
6202 case Intrinsic::aarch64_sve_fminnm_single_x4:
6204 Node->getValueType(0),
6205 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6206 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6207 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6208 return;
6209 case Intrinsic::aarch64_sve_fscale_single_x4:
6210 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::BFSCALE_4ZZ);
6211 return;
6212 case Intrinsic::aarch64_sve_fscale_single_x2:
6213 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::BFSCALE_2ZZ);
6214 return;
6215 case Intrinsic::aarch64_sve_fmul_single_x4:
6217 Node->getValueType(0),
6218 {AArch64::BFMUL_4ZZ, AArch64::FMUL_4ZZ_H, AArch64::FMUL_4ZZ_S,
6219 AArch64::FMUL_4ZZ_D}))
6220 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6221 return;
6222 case Intrinsic::aarch64_sve_fmul_single_x2:
6224 Node->getValueType(0),
6225 {AArch64::BFMUL_2ZZ, AArch64::FMUL_2ZZ_H, AArch64::FMUL_2ZZ_S,
6226 AArch64::FMUL_2ZZ_D}))
6227 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6228 return;
6229 case Intrinsic::aarch64_sve_fmaxnm_x2:
6231 Node->getValueType(0),
6232 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6233 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6234 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6235 return;
6236 case Intrinsic::aarch64_sve_fmaxnm_x4:
6238 Node->getValueType(0),
6239 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6240 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6241 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6242 return;
6243 case Intrinsic::aarch64_sve_fminnm_x2:
6245 Node->getValueType(0),
6246 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6247 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6248 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6249 return;
6250 case Intrinsic::aarch64_sve_fminnm_x4:
6252 Node->getValueType(0),
6253 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6254 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6255 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6256 return;
6257 case Intrinsic::aarch64_sve_fscale_x4:
6258 SelectDestructiveMultiIntrinsic(Node, 4, true, AArch64::BFSCALE_4Z4Z);
6259 return;
6260 case Intrinsic::aarch64_sve_fscale_x2:
6261 SelectDestructiveMultiIntrinsic(Node, 2, true, AArch64::BFSCALE_2Z2Z);
6262 return;
6263 case Intrinsic::aarch64_sve_fmul_x4:
6265 Node->getValueType(0),
6266 {AArch64::BFMUL_4Z4Z, AArch64::FMUL_4Z4Z_H, AArch64::FMUL_4Z4Z_S,
6267 AArch64::FMUL_4Z4Z_D}))
6268 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6269 return;
6270 case Intrinsic::aarch64_sve_fmul_x2:
6272 Node->getValueType(0),
6273 {AArch64::BFMUL_2Z2Z, AArch64::FMUL_2Z2Z_H, AArch64::FMUL_2Z2Z_S,
6274 AArch64::FMUL_2Z2Z_D}))
6275 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6276 return;
6277 case Intrinsic::aarch64_sve_fcvtzs_x2:
6278 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6279 return;
6280 case Intrinsic::aarch64_sve_scvtf_x2:
6281 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6282 return;
6283 case Intrinsic::aarch64_sve_fcvtzu_x2:
6284 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6285 return;
6286 case Intrinsic::aarch64_sve_ucvtf_x2:
6287 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6288 return;
6289 case Intrinsic::aarch64_sve_fcvtzs_x4:
6290 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6291 return;
6292 case Intrinsic::aarch64_sve_scvtf_x4:
6293 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6294 return;
6295 case Intrinsic::aarch64_sve_fcvtzu_x4:
6296 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6297 return;
6298 case Intrinsic::aarch64_sve_ucvtf_x4:
6299 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6300 return;
6301 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6302 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6303 return;
6304 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6305 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6306 return;
6307 case Intrinsic::aarch64_sve_sclamp_single_x2:
6309 Node->getValueType(0),
6310 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6311 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6312 SelectClamp(Node, 2, Op);
6313 return;
6314 case Intrinsic::aarch64_sve_uclamp_single_x2:
6316 Node->getValueType(0),
6317 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6318 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6319 SelectClamp(Node, 2, Op);
6320 return;
6321 case Intrinsic::aarch64_sve_fclamp_single_x2:
6323 Node->getValueType(0),
6324 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6325 AArch64::FCLAMP_VG2_2Z2Z_D}))
6326 SelectClamp(Node, 2, Op);
6327 return;
6328 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6329 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6330 return;
6331 case Intrinsic::aarch64_sve_sclamp_single_x4:
6333 Node->getValueType(0),
6334 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6335 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6336 SelectClamp(Node, 4, Op);
6337 return;
6338 case Intrinsic::aarch64_sve_uclamp_single_x4:
6340 Node->getValueType(0),
6341 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6342 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6343 SelectClamp(Node, 4, Op);
6344 return;
6345 case Intrinsic::aarch64_sve_fclamp_single_x4:
6347 Node->getValueType(0),
6348 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6349 AArch64::FCLAMP_VG4_4Z4Z_D}))
6350 SelectClamp(Node, 4, Op);
6351 return;
6352 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6353 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6354 return;
6355 case Intrinsic::aarch64_sve_add_single_x2:
6357 Node->getValueType(0),
6358 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6359 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6360 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6361 return;
6362 case Intrinsic::aarch64_sve_add_single_x4:
6364 Node->getValueType(0),
6365 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6366 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6367 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6368 return;
6369 case Intrinsic::aarch64_sve_zip_x2:
6371 Node->getValueType(0),
6372 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6373 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6374 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6375 return;
6376 case Intrinsic::aarch64_sve_zipq_x2:
6377 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6378 AArch64::ZIP_VG2_2ZZZ_Q);
6379 return;
6380 case Intrinsic::aarch64_sve_zip_x4:
6382 Node->getValueType(0),
6383 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6384 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6385 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6386 return;
6387 case Intrinsic::aarch64_sve_zipq_x4:
6388 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6389 AArch64::ZIP_VG4_4Z4Z_Q);
6390 return;
6391 case Intrinsic::aarch64_sve_uzp_x2:
6393 Node->getValueType(0),
6394 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6395 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6396 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6397 return;
6398 case Intrinsic::aarch64_sve_uzpq_x2:
6399 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6400 AArch64::UZP_VG2_2ZZZ_Q);
6401 return;
6402 case Intrinsic::aarch64_sve_uzp_x4:
6404 Node->getValueType(0),
6405 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6406 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6407 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6408 return;
6409 case Intrinsic::aarch64_sve_uzpq_x4:
6410 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6411 AArch64::UZP_VG4_4Z4Z_Q);
6412 return;
6413 case Intrinsic::aarch64_sve_sel_x2:
6415 Node->getValueType(0),
6416 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6417 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6418 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6419 return;
6420 case Intrinsic::aarch64_sve_sel_x4:
6422 Node->getValueType(0),
6423 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6424 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6425 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6426 return;
6427 case Intrinsic::aarch64_sve_frinta_x2:
6428 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6429 return;
6430 case Intrinsic::aarch64_sve_frinta_x4:
6431 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6432 return;
6433 case Intrinsic::aarch64_sve_frintm_x2:
6434 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6435 return;
6436 case Intrinsic::aarch64_sve_frintm_x4:
6437 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6438 return;
6439 case Intrinsic::aarch64_sve_frintn_x2:
6440 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6441 return;
6442 case Intrinsic::aarch64_sve_frintn_x4:
6443 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6444 return;
6445 case Intrinsic::aarch64_sve_frintp_x2:
6446 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6447 return;
6448 case Intrinsic::aarch64_sve_frintp_x4:
6449 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6450 return;
6451 case Intrinsic::aarch64_sve_sunpk_x2:
6453 Node->getValueType(0),
6454 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6455 AArch64::SUNPK_VG2_2ZZ_D}))
6456 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6457 return;
6458 case Intrinsic::aarch64_sve_uunpk_x2:
6460 Node->getValueType(0),
6461 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6462 AArch64::UUNPK_VG2_2ZZ_D}))
6463 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6464 return;
6465 case Intrinsic::aarch64_sve_sunpk_x4:
6467 Node->getValueType(0),
6468 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6469 AArch64::SUNPK_VG4_4Z2Z_D}))
6470 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6471 return;
6472 case Intrinsic::aarch64_sve_uunpk_x4:
6474 Node->getValueType(0),
6475 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6476 AArch64::UUNPK_VG4_4Z2Z_D}))
6477 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6478 return;
6479 case Intrinsic::aarch64_sve_pext_x2: {
6481 Node->getValueType(0),
6482 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6483 AArch64::PEXT_2PCI_D}))
6484 SelectPExtPair(Node, Op);
6485 return;
6486 }
6487 }
6488 break;
6489 }
6490 case ISD::INTRINSIC_VOID: {
6491 unsigned IntNo = Node->getConstantOperandVal(1);
6492 if (Node->getNumOperands() >= 3)
6493 VT = Node->getOperand(2)->getValueType(0);
6494 switch (IntNo) {
6495 default:
6496 break;
6497 case Intrinsic::aarch64_neon_st1x2: {
6498 if (VT == MVT::v8i8) {
6499 SelectStore(Node, 2, AArch64::ST1Twov8b);
6500 return;
6501 } else if (VT == MVT::v16i8) {
6502 SelectStore(Node, 2, AArch64::ST1Twov16b);
6503 return;
6504 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6505 VT == MVT::v4bf16) {
6506 SelectStore(Node, 2, AArch64::ST1Twov4h);
6507 return;
6508 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6509 VT == MVT::v8bf16) {
6510 SelectStore(Node, 2, AArch64::ST1Twov8h);
6511 return;
6512 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6513 SelectStore(Node, 2, AArch64::ST1Twov2s);
6514 return;
6515 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6516 SelectStore(Node, 2, AArch64::ST1Twov4s);
6517 return;
6518 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6519 SelectStore(Node, 2, AArch64::ST1Twov2d);
6520 return;
6521 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6522 SelectStore(Node, 2, AArch64::ST1Twov1d);
6523 return;
6524 }
6525 break;
6526 }
6527 case Intrinsic::aarch64_neon_st1x3: {
6528 if (VT == MVT::v8i8) {
6529 SelectStore(Node, 3, AArch64::ST1Threev8b);
6530 return;
6531 } else if (VT == MVT::v16i8) {
6532 SelectStore(Node, 3, AArch64::ST1Threev16b);
6533 return;
6534 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6535 VT == MVT::v4bf16) {
6536 SelectStore(Node, 3, AArch64::ST1Threev4h);
6537 return;
6538 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6539 VT == MVT::v8bf16) {
6540 SelectStore(Node, 3, AArch64::ST1Threev8h);
6541 return;
6542 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6543 SelectStore(Node, 3, AArch64::ST1Threev2s);
6544 return;
6545 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6546 SelectStore(Node, 3, AArch64::ST1Threev4s);
6547 return;
6548 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6549 SelectStore(Node, 3, AArch64::ST1Threev2d);
6550 return;
6551 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6552 SelectStore(Node, 3, AArch64::ST1Threev1d);
6553 return;
6554 }
6555 break;
6556 }
6557 case Intrinsic::aarch64_neon_st1x4: {
6558 if (VT == MVT::v8i8) {
6559 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6560 return;
6561 } else if (VT == MVT::v16i8) {
6562 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6563 return;
6564 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6565 VT == MVT::v4bf16) {
6566 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6567 return;
6568 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6569 VT == MVT::v8bf16) {
6570 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6571 return;
6572 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6573 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6574 return;
6575 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6576 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6577 return;
6578 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6579 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6580 return;
6581 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6582 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6583 return;
6584 }
6585 break;
6586 }
6587 case Intrinsic::aarch64_neon_st2: {
6588 if (VT == MVT::v8i8) {
6589 SelectStore(Node, 2, AArch64::ST2Twov8b);
6590 return;
6591 } else if (VT == MVT::v16i8) {
6592 SelectStore(Node, 2, AArch64::ST2Twov16b);
6593 return;
6594 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6595 VT == MVT::v4bf16) {
6596 SelectStore(Node, 2, AArch64::ST2Twov4h);
6597 return;
6598 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6599 VT == MVT::v8bf16) {
6600 SelectStore(Node, 2, AArch64::ST2Twov8h);
6601 return;
6602 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6603 SelectStore(Node, 2, AArch64::ST2Twov2s);
6604 return;
6605 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6606 SelectStore(Node, 2, AArch64::ST2Twov4s);
6607 return;
6608 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6609 SelectStore(Node, 2, AArch64::ST2Twov2d);
6610 return;
6611 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6612 SelectStore(Node, 2, AArch64::ST1Twov1d);
6613 return;
6614 }
6615 break;
6616 }
6617 case Intrinsic::aarch64_neon_st3: {
6618 if (VT == MVT::v8i8) {
6619 SelectStore(Node, 3, AArch64::ST3Threev8b);
6620 return;
6621 } else if (VT == MVT::v16i8) {
6622 SelectStore(Node, 3, AArch64::ST3Threev16b);
6623 return;
6624 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6625 VT == MVT::v4bf16) {
6626 SelectStore(Node, 3, AArch64::ST3Threev4h);
6627 return;
6628 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6629 VT == MVT::v8bf16) {
6630 SelectStore(Node, 3, AArch64::ST3Threev8h);
6631 return;
6632 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6633 SelectStore(Node, 3, AArch64::ST3Threev2s);
6634 return;
6635 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6636 SelectStore(Node, 3, AArch64::ST3Threev4s);
6637 return;
6638 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6639 SelectStore(Node, 3, AArch64::ST3Threev2d);
6640 return;
6641 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6642 SelectStore(Node, 3, AArch64::ST1Threev1d);
6643 return;
6644 }
6645 break;
6646 }
6647 case Intrinsic::aarch64_neon_st4: {
6648 if (VT == MVT::v8i8) {
6649 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6650 return;
6651 } else if (VT == MVT::v16i8) {
6652 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6653 return;
6654 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6655 VT == MVT::v4bf16) {
6656 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6657 return;
6658 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6659 VT == MVT::v8bf16) {
6660 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6661 return;
6662 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6663 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6664 return;
6665 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6666 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6667 return;
6668 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6669 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6670 return;
6671 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6672 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6673 return;
6674 }
6675 break;
6676 }
6677 case Intrinsic::aarch64_neon_st2lane: {
6678 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6679 SelectStoreLane(Node, 2, AArch64::ST2i8);
6680 return;
6681 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6682 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6683 SelectStoreLane(Node, 2, AArch64::ST2i16);
6684 return;
6685 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6686 VT == MVT::v2f32) {
6687 SelectStoreLane(Node, 2, AArch64::ST2i32);
6688 return;
6689 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6690 VT == MVT::v1f64) {
6691 SelectStoreLane(Node, 2, AArch64::ST2i64);
6692 return;
6693 }
6694 break;
6695 }
6696 case Intrinsic::aarch64_neon_st3lane: {
6697 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6698 SelectStoreLane(Node, 3, AArch64::ST3i8);
6699 return;
6700 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6701 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6702 SelectStoreLane(Node, 3, AArch64::ST3i16);
6703 return;
6704 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6705 VT == MVT::v2f32) {
6706 SelectStoreLane(Node, 3, AArch64::ST3i32);
6707 return;
6708 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6709 VT == MVT::v1f64) {
6710 SelectStoreLane(Node, 3, AArch64::ST3i64);
6711 return;
6712 }
6713 break;
6714 }
6715 case Intrinsic::aarch64_neon_st4lane: {
6716 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6717 SelectStoreLane(Node, 4, AArch64::ST4i8);
6718 return;
6719 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6720 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6721 SelectStoreLane(Node, 4, AArch64::ST4i16);
6722 return;
6723 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6724 VT == MVT::v2f32) {
6725 SelectStoreLane(Node, 4, AArch64::ST4i32);
6726 return;
6727 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6728 VT == MVT::v1f64) {
6729 SelectStoreLane(Node, 4, AArch64::ST4i64);
6730 return;
6731 }
6732 break;
6733 }
6734 case Intrinsic::aarch64_sve_st2q: {
6735 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6736 return;
6737 }
6738 case Intrinsic::aarch64_sve_st3q: {
6739 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6740 return;
6741 }
6742 case Intrinsic::aarch64_sve_st4q: {
6743 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6744 return;
6745 }
6746 case Intrinsic::aarch64_sve_st2: {
6747 if (VT == MVT::nxv16i8) {
6748 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6749 return;
6750 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6751 VT == MVT::nxv8bf16) {
6752 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6753 return;
6754 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6755 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6756 return;
6757 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6758 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6759 return;
6760 }
6761 break;
6762 }
6763 case Intrinsic::aarch64_sve_st3: {
6764 if (VT == MVT::nxv16i8) {
6765 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6766 return;
6767 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6768 VT == MVT::nxv8bf16) {
6769 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6770 return;
6771 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6772 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6773 return;
6774 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6775 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6776 return;
6777 }
6778 break;
6779 }
6780 case Intrinsic::aarch64_sve_st4: {
6781 if (VT == MVT::nxv16i8) {
6782 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6783 return;
6784 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6785 VT == MVT::nxv8bf16) {
6786 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6787 return;
6788 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6789 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6790 return;
6791 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6792 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6793 return;
6794 }
6795 break;
6796 }
6797 }
6798 break;
6799 }
6800 case AArch64ISD::LD2post: {
6801 if (VT == MVT::v8i8) {
6802 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6803 return;
6804 } else if (VT == MVT::v16i8) {
6805 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6806 return;
6807 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6808 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6809 return;
6810 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6811 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6812 return;
6813 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6814 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6815 return;
6816 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6817 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6818 return;
6819 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6820 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6821 return;
6822 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6823 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6824 return;
6825 }
6826 break;
6827 }
6828 case AArch64ISD::LD3post: {
6829 if (VT == MVT::v8i8) {
6830 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6831 return;
6832 } else if (VT == MVT::v16i8) {
6833 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6834 return;
6835 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6836 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6837 return;
6838 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6839 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6840 return;
6841 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6842 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6843 return;
6844 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6845 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6846 return;
6847 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6848 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6849 return;
6850 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6851 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6852 return;
6853 }
6854 break;
6855 }
6856 case AArch64ISD::LD4post: {
6857 if (VT == MVT::v8i8) {
6858 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6859 return;
6860 } else if (VT == MVT::v16i8) {
6861 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6862 return;
6863 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6864 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6865 return;
6866 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6867 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6868 return;
6869 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6870 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6871 return;
6872 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6873 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6874 return;
6875 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6876 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6877 return;
6878 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6879 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6880 return;
6881 }
6882 break;
6883 }
6884 case AArch64ISD::LD1x2post: {
6885 if (VT == MVT::v8i8) {
6886 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6887 return;
6888 } else if (VT == MVT::v16i8) {
6889 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6890 return;
6891 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6892 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6893 return;
6894 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6895 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6896 return;
6897 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6898 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6899 return;
6900 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6901 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6902 return;
6903 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6904 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6905 return;
6906 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6907 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6908 return;
6909 }
6910 break;
6911 }
6912 case AArch64ISD::LD1x3post: {
6913 if (VT == MVT::v8i8) {
6914 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6915 return;
6916 } else if (VT == MVT::v16i8) {
6917 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6918 return;
6919 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6920 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6921 return;
6922 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6923 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6924 return;
6925 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6926 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6927 return;
6928 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6929 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6930 return;
6931 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6932 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6933 return;
6934 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6935 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6936 return;
6937 }
6938 break;
6939 }
6940 case AArch64ISD::LD1x4post: {
6941 if (VT == MVT::v8i8) {
6942 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6943 return;
6944 } else if (VT == MVT::v16i8) {
6945 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6946 return;
6947 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6948 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6949 return;
6950 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6951 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6952 return;
6953 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6954 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6955 return;
6956 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6957 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6958 return;
6959 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6960 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6961 return;
6962 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6963 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6964 return;
6965 }
6966 break;
6967 }
6968 case AArch64ISD::LD1DUPpost: {
6969 if (VT == MVT::v8i8) {
6970 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6971 return;
6972 } else if (VT == MVT::v16i8) {
6973 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6974 return;
6975 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6976 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6977 return;
6978 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6979 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6980 return;
6981 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6982 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6983 return;
6984 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6985 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6986 return;
6987 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6988 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6989 return;
6990 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6991 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6992 return;
6993 }
6994 break;
6995 }
6996 case AArch64ISD::LD2DUPpost: {
6997 if (VT == MVT::v8i8) {
6998 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6999 return;
7000 } else if (VT == MVT::v16i8) {
7001 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
7002 return;
7003 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7004 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
7005 return;
7006 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7007 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
7008 return;
7009 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7010 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
7011 return;
7012 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7013 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
7014 return;
7015 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7016 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
7017 return;
7018 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7019 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
7020 return;
7021 }
7022 break;
7023 }
7024 case AArch64ISD::LD3DUPpost: {
7025 if (VT == MVT::v8i8) {
7026 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
7027 return;
7028 } else if (VT == MVT::v16i8) {
7029 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7030 return;
7031 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7032 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7033 return;
7034 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7035 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7036 return;
7037 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7038 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7039 return;
7040 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7041 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7042 return;
7043 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7044 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7045 return;
7046 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7047 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7048 return;
7049 }
7050 break;
7051 }
7052 case AArch64ISD::LD4DUPpost: {
7053 if (VT == MVT::v8i8) {
7054 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7055 return;
7056 } else if (VT == MVT::v16i8) {
7057 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7058 return;
7059 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7060 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7061 return;
7062 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7063 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7064 return;
7065 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7066 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7067 return;
7068 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7069 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7070 return;
7071 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7072 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7073 return;
7074 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7075 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7076 return;
7077 }
7078 break;
7079 }
7080 case AArch64ISD::LD1LANEpost: {
7081 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7082 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7083 return;
7084 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7085 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7086 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7087 return;
7088 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7089 VT == MVT::v2f32) {
7090 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7091 return;
7092 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7093 VT == MVT::v1f64) {
7094 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7095 return;
7096 }
7097 break;
7098 }
7099 case AArch64ISD::LD2LANEpost: {
7100 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7101 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7102 return;
7103 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7104 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7105 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7106 return;
7107 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7108 VT == MVT::v2f32) {
7109 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7110 return;
7111 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7112 VT == MVT::v1f64) {
7113 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7114 return;
7115 }
7116 break;
7117 }
7118 case AArch64ISD::LD3LANEpost: {
7119 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7120 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7121 return;
7122 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7123 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7124 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7125 return;
7126 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7127 VT == MVT::v2f32) {
7128 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7129 return;
7130 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7131 VT == MVT::v1f64) {
7132 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7133 return;
7134 }
7135 break;
7136 }
7137 case AArch64ISD::LD4LANEpost: {
7138 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7139 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7140 return;
7141 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7142 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7143 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7144 return;
7145 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7146 VT == MVT::v2f32) {
7147 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7148 return;
7149 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7150 VT == MVT::v1f64) {
7151 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7152 return;
7153 }
7154 break;
7155 }
7156 case AArch64ISD::ST2post: {
7157 VT = Node->getOperand(1).getValueType();
7158 if (VT == MVT::v8i8) {
7159 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7160 return;
7161 } else if (VT == MVT::v16i8) {
7162 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7163 return;
7164 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7165 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7166 return;
7167 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7168 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7169 return;
7170 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7171 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7172 return;
7173 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7174 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7175 return;
7176 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7177 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7178 return;
7179 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7180 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7181 return;
7182 }
7183 break;
7184 }
7185 case AArch64ISD::ST3post: {
7186 VT = Node->getOperand(1).getValueType();
7187 if (VT == MVT::v8i8) {
7188 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7189 return;
7190 } else if (VT == MVT::v16i8) {
7191 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7192 return;
7193 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7194 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7195 return;
7196 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7197 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7198 return;
7199 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7200 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7201 return;
7202 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7203 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7204 return;
7205 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7206 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7207 return;
7208 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7209 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7210 return;
7211 }
7212 break;
7213 }
7214 case AArch64ISD::ST4post: {
7215 VT = Node->getOperand(1).getValueType();
7216 if (VT == MVT::v8i8) {
7217 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7218 return;
7219 } else if (VT == MVT::v16i8) {
7220 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7221 return;
7222 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7223 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7224 return;
7225 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7226 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7227 return;
7228 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7229 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7230 return;
7231 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7232 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7233 return;
7234 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7235 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7236 return;
7237 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7238 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7239 return;
7240 }
7241 break;
7242 }
7243 case AArch64ISD::ST1x2post: {
7244 VT = Node->getOperand(1).getValueType();
7245 if (VT == MVT::v8i8) {
7246 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7247 return;
7248 } else if (VT == MVT::v16i8) {
7249 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7250 return;
7251 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7252 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7253 return;
7254 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7255 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7256 return;
7257 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7258 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7259 return;
7260 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7261 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7262 return;
7263 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7264 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7265 return;
7266 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7267 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7268 return;
7269 }
7270 break;
7271 }
7272 case AArch64ISD::ST1x3post: {
7273 VT = Node->getOperand(1).getValueType();
7274 if (VT == MVT::v8i8) {
7275 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7276 return;
7277 } else if (VT == MVT::v16i8) {
7278 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7279 return;
7280 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7281 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7282 return;
7283 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7284 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7285 return;
7286 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7287 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7288 return;
7289 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7290 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7291 return;
7292 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7293 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7294 return;
7295 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7296 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7297 return;
7298 }
7299 break;
7300 }
7301 case AArch64ISD::ST1x4post: {
7302 VT = Node->getOperand(1).getValueType();
7303 if (VT == MVT::v8i8) {
7304 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7305 return;
7306 } else if (VT == MVT::v16i8) {
7307 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7308 return;
7309 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7310 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7311 return;
7312 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7313 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7314 return;
7315 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7316 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7317 return;
7318 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7319 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7320 return;
7321 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7322 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7323 return;
7324 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7325 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7326 return;
7327 }
7328 break;
7329 }
7330 case AArch64ISD::ST2LANEpost: {
7331 VT = Node->getOperand(1).getValueType();
7332 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7333 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7334 return;
7335 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7336 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7337 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7338 return;
7339 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7340 VT == MVT::v2f32) {
7341 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7342 return;
7343 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7344 VT == MVT::v1f64) {
7345 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7346 return;
7347 }
7348 break;
7349 }
7350 case AArch64ISD::ST3LANEpost: {
7351 VT = Node->getOperand(1).getValueType();
7352 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7353 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7354 return;
7355 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7356 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7357 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7358 return;
7359 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7360 VT == MVT::v2f32) {
7361 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7362 return;
7363 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7364 VT == MVT::v1f64) {
7365 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7366 return;
7367 }
7368 break;
7369 }
7370 case AArch64ISD::ST4LANEpost: {
7371 VT = Node->getOperand(1).getValueType();
7372 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7373 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7374 return;
7375 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7376 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7377 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7378 return;
7379 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7380 VT == MVT::v2f32) {
7381 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7382 return;
7383 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7384 VT == MVT::v1f64) {
7385 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7386 return;
7387 }
7388 break;
7389 }
7390 }
7391
7392 // Select the default instruction
7393 SelectCode(Node);
7394}
7395
7396/// createAArch64ISelDag - This pass converts a legalized DAG into a
7397/// AArch64-specific DAG, ready for instruction scheduling.
7399 CodeGenOptLevel OptLevel) {
7400 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7401}
7402
7403/// When \p PredVT is a scalable vector predicate in the form
7404/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7405/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7406/// structured vectors (NumVec >1), the output data type is
7407/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7408/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7409/// EVT.
7411 unsigned NumVec) {
7412 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7413 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7414 return EVT();
7415
7416 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7417 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7418 return EVT();
7419
7420 ElementCount EC = PredVT.getVectorElementCount();
7421 EVT ScalarVT =
7422 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7423 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7424
7425 return MemVT;
7426}
7427
7428/// Return the EVT of the data associated to a memory operation in \p
7429/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7431 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7432 return MemIntr->getMemoryVT();
7433
7434 if (isa<MemSDNode>(Root)) {
7435 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7436
7437 EVT DataVT;
7438 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7439 DataVT = Load->getValueType(0);
7440 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7441 DataVT = Load->getValueType(0);
7442 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7443 DataVT = Store->getValue().getValueType();
7444 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7445 DataVT = Store->getValue().getValueType();
7446 else
7447 llvm_unreachable("Unexpected MemSDNode!");
7448
7449 return DataVT.changeVectorElementType(Ctx, MemVT.getVectorElementType());
7450 }
7451
7452 const unsigned Opcode = Root->getOpcode();
7453 // For custom ISD nodes, we have to look at them individually to extract the
7454 // type of the data moved to/from memory.
7455 switch (Opcode) {
7456 case AArch64ISD::LD1_MERGE_ZERO:
7457 case AArch64ISD::LD1S_MERGE_ZERO:
7458 case AArch64ISD::LDNF1_MERGE_ZERO:
7459 case AArch64ISD::LDNF1S_MERGE_ZERO:
7460 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7461 case AArch64ISD::ST1_PRED:
7462 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7463 default:
7464 break;
7465 }
7466
7467 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7468 return EVT();
7469
7470 switch (Root->getConstantOperandVal(1)) {
7471 default:
7472 return EVT();
7473 case Intrinsic::aarch64_sme_ldr:
7474 case Intrinsic::aarch64_sme_str:
7475 return MVT::nxv16i8;
7476 case Intrinsic::aarch64_sve_prf:
7477 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7478 // width of the predicate.
7480 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7481 case Intrinsic::aarch64_sve_ld2_sret:
7482 case Intrinsic::aarch64_sve_ld2q_sret:
7484 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7485 case Intrinsic::aarch64_sve_st2q:
7487 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7488 case Intrinsic::aarch64_sve_ld3_sret:
7489 case Intrinsic::aarch64_sve_ld3q_sret:
7491 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7492 case Intrinsic::aarch64_sve_st3q:
7494 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7495 case Intrinsic::aarch64_sve_ld4_sret:
7496 case Intrinsic::aarch64_sve_ld4q_sret:
7498 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7499 case Intrinsic::aarch64_sve_st4q:
7501 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7502 case Intrinsic::aarch64_sve_ld1udq:
7503 case Intrinsic::aarch64_sve_st1dq:
7504 return EVT(MVT::nxv1i64);
7505 case Intrinsic::aarch64_sve_ld1uwq:
7506 case Intrinsic::aarch64_sve_st1wq:
7507 return EVT(MVT::nxv1i32);
7508 }
7509}
7510
7511/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7512/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7513/// where Root is the memory access using N for its address.
7514template <int64_t Min, int64_t Max>
7515bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7516 SDValue &Base,
7517 SDValue &OffImm) {
7518 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7519 const DataLayout &DL = CurDAG->getDataLayout();
7520 const MachineFrameInfo &MFI = MF->getFrameInfo();
7521
7522 if (N.getOpcode() == ISD::FrameIndex) {
7523 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7524 // We can only encode VL scaled offsets, so only fold in frame indexes
7525 // referencing SVE objects.
7526 if (MFI.hasScalableStackID(FI)) {
7527 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7528 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7529 return true;
7530 }
7531
7532 return false;
7533 }
7534
7535 if (MemVT == EVT())
7536 return false;
7537
7538 if (N.getOpcode() != ISD::ADD)
7539 return false;
7540
7541 SDValue VScale = N.getOperand(1);
7542 int64_t MulImm = std::numeric_limits<int64_t>::max();
7543 if (VScale.getOpcode() == ISD::VSCALE) {
7544 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7545 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7546 int64_t ByteOffset = C->getSExtValue();
7547 const auto KnownVScale =
7549
7550 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7551 return false;
7552
7553 MulImm = ByteOffset / KnownVScale;
7554 } else
7555 return false;
7556
7557 TypeSize TS = MemVT.getSizeInBits();
7558 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7559
7560 if ((MulImm % MemWidthBytes) != 0)
7561 return false;
7562
7563 int64_t Offset = MulImm / MemWidthBytes;
7565 return false;
7566
7567 Base = N.getOperand(0);
7568 if (Base.getOpcode() == ISD::FrameIndex) {
7569 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7570 // We can only encode VL scaled offsets, so only fold in frame indexes
7571 // referencing SVE objects.
7572 if (MFI.hasScalableStackID(FI))
7573 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7574 }
7575
7576 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7577 return true;
7578}
7579
7580/// Select register plus register addressing mode for SVE, with scaled
7581/// offset.
7582bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7583 SDValue &Base,
7584 SDValue &Offset) {
7585 if (N.getOpcode() != ISD::ADD)
7586 return false;
7587
7588 // Process an ADD node.
7589 const SDValue LHS = N.getOperand(0);
7590 const SDValue RHS = N.getOperand(1);
7591
7592 // 8 bit data does not come with the SHL node, so it is treated
7593 // separately.
7594 if (Scale == 0) {
7595 Base = LHS;
7596 Offset = RHS;
7597 return true;
7598 }
7599
7600 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7601 int64_t ImmOff = C->getSExtValue();
7602 unsigned Size = 1 << Scale;
7603
7604 // To use the reg+reg addressing mode, the immediate must be a multiple of
7605 // the vector element's byte size.
7606 if (ImmOff % Size)
7607 return false;
7608
7609 SDLoc DL(N);
7610 Base = LHS;
7611 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7612 SDValue Ops[] = {Offset};
7613 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7614 Offset = SDValue(MI, 0);
7615 return true;
7616 }
7617
7618 // Check if the RHS is a shift node with a constant.
7619 if (RHS.getOpcode() != ISD::SHL)
7620 return false;
7621
7622 const SDValue ShiftRHS = RHS.getOperand(1);
7623 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7624 if (C->getZExtValue() == Scale) {
7625 Base = LHS;
7626 Offset = RHS.getOperand(0);
7627 return true;
7628 }
7629
7630 return false;
7631}
7632
7633bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7634 const AArch64TargetLowering *TLI =
7635 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7636
7637 return TLI->isAllActivePredicate(*CurDAG, N);
7638}
7639
7640bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7641 EVT VT = N.getValueType();
7642 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7643}
7644
7645bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7647 unsigned Scale) {
7648 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7649 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
7650 int64_t ImmOff = C->getSExtValue();
7651 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7652 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7653 }
7654 return SDValue();
7655 };
7656
7657 if (SDValue C = MatchConstantOffset(N)) {
7658 Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7659 Offset = C;
7660 return true;
7661 }
7662
7663 // Try to untangle an ADD node into a 'reg + offset'
7664 if (CurDAG->isBaseWithConstantOffset(N)) {
7665 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7666 Base = N.getOperand(0);
7667 Offset = C;
7668 return true;
7669 }
7670 }
7671
7672 // By default, just match reg + 0.
7673 Base = N;
7674 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7675 return true;
7676}
7677
7678bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7679 SDValue &Imm) {
7681 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
7682 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
7683 // Check conservatively if the immediate fits the valid range [0, 64).
7684 // Immediate variants for GE and HS definitely need to be decremented
7685 // when lowering the pseudos later, so an immediate of 1 would become 0.
7686 // For the inverse conditions LT and LO we don't know for sure if they
7687 // will need a decrement but should the decision be made to reverse the
7688 // branch condition, we again end up with the need to decrement.
7689 // The same argument holds for LE, LS, GT and HI and possibly
7690 // incremented immediates. This can lead to slightly less optimal
7691 // codegen, e.g. we never codegen the legal case
7692 // cblt w0, #63, A
7693 // because we could end up with the illegal case
7694 // cbge w0, #64, B
7695 // should the decision to reverse the branch direction be made. For the
7696 // lower bound cases this is no problem since we can express comparisons
7697 // against 0 with either tbz/tnbz or using wzr/xzr.
7698 uint64_t LowerBound = 0, UpperBound = 64;
7699 switch (CC) {
7700 case AArch64CC::GE:
7701 case AArch64CC::HS:
7702 case AArch64CC::LT:
7703 case AArch64CC::LO:
7704 LowerBound = 1;
7705 break;
7706 case AArch64CC::LE:
7707 case AArch64CC::LS:
7708 case AArch64CC::GT:
7709 case AArch64CC::HI:
7710 UpperBound = 63;
7711 break;
7712 default:
7713 break;
7714 }
7715
7716 if (CN->getAPIntValue().uge(LowerBound) &&
7717 CN->getAPIntValue().ult(UpperBound)) {
7718 SDLoc DL(N);
7719 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
7720 return true;
7721 }
7722 }
7723
7724 return false;
7725}
7726
7727template <bool MatchCBB>
7728bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
7729 SDValue &ExtType) {
7730
7731 // Use an invalid shift-extend value to indicate we don't need to extend later
7732 if (N.getOpcode() == ISD::AssertZext || N.getOpcode() == ISD::AssertSext) {
7733 EVT Ty = cast<VTSDNode>(N.getOperand(1))->getVT();
7734 if (Ty != (MatchCBB ? MVT::i8 : MVT::i16))
7735 return false;
7736 Reg = N.getOperand(0);
7737 ExtType = CurDAG->getSignedTargetConstant(AArch64_AM::InvalidShiftExtend,
7738 SDLoc(N), MVT::i32);
7739 return true;
7740 }
7741
7743
7744 if ((MatchCBB && (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB)) ||
7745 (!MatchCBB && (ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH))) {
7746 Reg = N.getOperand(0);
7747 ExtType =
7748 CurDAG->getTargetConstant(getExtendEncoding(ET), SDLoc(N), MVT::i32);
7749 return true;
7750 }
7751
7752 return false;
7753}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
return SDValue()
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define DEBUG_TYPE
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_NAME
Value * RHS
Value * LHS
const AArch64RegisterInfo * getRegisterInfo() const override
bool isStreaming() const
Returns true if the function has a streaming body.
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
unsigned getSVEVectorSizeInBits() const
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
LLVM_ABI bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5995
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1314
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1671
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1640
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1599
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1453
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:859
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasScalableStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:712
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getID() const
Return the register class ID number.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:963
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getExtendEncoding(AArch64_AM::ShiftExtendType ET)
Mapping from extend bits to required operation: shifter: 000 ==> uxtb 001 ==> uxth 010 ==> uxtw 011 =...
static bool isSVELogicalImm(unsigned SizeInBits, uint64_t ImmVal, uint64_t &Encoding)
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:595
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:847
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:987
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:838
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:671
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:764
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:609
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:844
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:882
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:738
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:850
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2016
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
Extended Value Type.
Definition ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:202
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.