LLVM 23.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64.h"
17#include "llvm/ADT/APSInt.h"
20#include "llvm/IR/Function.h" // To access function attributes.
21#include "llvm/IR/GlobalValue.h"
22#include "llvm/IR/Intrinsics.h"
23#include "llvm/IR/IntrinsicsAArch64.h"
24#include "llvm/Support/Debug.h"
29
30using namespace llvm;
31
32#define DEBUG_TYPE "aarch64-isel"
33#define PASS_NAME "AArch64 Instruction Selection"
34
35// https://github.com/llvm/llvm-project/issues/114425
36#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
37#pragma inline_depth(0)
38#endif
39
40//===--------------------------------------------------------------------===//
41/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
42/// instructions for SelectionDAG operations.
43///
44namespace {
45
46class AArch64DAGToDAGISel : public SelectionDAGISel {
47
48 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
49 /// make the right decision when generating code for different targets.
50 const AArch64Subtarget *Subtarget;
51
52public:
53 AArch64DAGToDAGISel() = delete;
54
55 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
56 CodeGenOptLevel OptLevel)
57 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
58
59 bool runOnMachineFunction(MachineFunction &MF) override {
60 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
62 }
63
64 void Select(SDNode *Node) override;
65 void PreprocessISelDAG() override;
66
67 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
68 /// inline asm expressions.
69 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
70 InlineAsm::ConstraintCode ConstraintID,
71 std::vector<SDValue> &OutOps) override;
72
73 template <signed Low, signed High, signed Scale>
74 bool SelectRDVLImm(SDValue N, SDValue &Imm);
75
76 template <signed Low, signed High>
77 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
78
79 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
80 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
81 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
82 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
83 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
84 return SelectShiftedRegister(N, false, Reg, Shift);
85 }
86 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
87 return SelectShiftedRegister(N, true, Reg, Shift);
88 }
89 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
90 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
91 }
92 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
93 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
94 }
95 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
96 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
97 }
98 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
99 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
100 }
101 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
102 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
103 }
104 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
105 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
106 }
107 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
108 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
109 }
110 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
111 return SelectAddrModeIndexed(N, 1, Base, OffImm);
112 }
113 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
114 return SelectAddrModeIndexed(N, 2, Base, OffImm);
115 }
116 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
117 return SelectAddrModeIndexed(N, 4, Base, OffImm);
118 }
119 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
120 return SelectAddrModeIndexed(N, 8, Base, OffImm);
121 }
122 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
123 return SelectAddrModeIndexed(N, 16, Base, OffImm);
124 }
125 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
126 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
127 }
128 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
129 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
130 }
131 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
132 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
133 }
134 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
135 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
136 }
137 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
138 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
139 }
140 template <unsigned Size, unsigned Max>
141 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
142 // Test if there is an appropriate addressing mode and check if the
143 // immediate fits.
144 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
145 if (Found) {
146 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
147 int64_t C = CI->getSExtValue();
148 if (C <= Max)
149 return true;
150 }
151 }
152
153 // Otherwise, base only, materialize address in register.
154 Base = N;
155 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
156 return true;
157 }
158
159 template<int Width>
160 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
161 SDValue &SignExtend, SDValue &DoShift) {
162 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
163 }
164
165 template<int Width>
166 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
167 SDValue &SignExtend, SDValue &DoShift) {
168 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
169 }
170
171 bool SelectExtractHigh(SDValue N, SDValue &Res) {
172 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
173 N = N->getOperand(0);
174 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
175 !isa<ConstantSDNode>(N->getOperand(1)))
176 return false;
177 EVT VT = N->getValueType(0);
178 EVT LVT = N->getOperand(0).getValueType();
179 unsigned Index = N->getConstantOperandVal(1);
180 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
181 Index != VT.getVectorNumElements())
182 return false;
183 Res = N->getOperand(0);
184 return true;
185 }
186
187 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
188 if (N.getOpcode() != AArch64ISD::VLSHR)
189 return false;
190 SDValue Op = N->getOperand(0);
191 EVT VT = Op.getValueType();
192 unsigned ShtAmt = N->getConstantOperandVal(1);
193 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
194 return false;
195
196 APInt Imm;
197 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
198 Imm = APInt(VT.getScalarSizeInBits(),
199 Op.getOperand(1).getConstantOperandVal(0)
200 << Op.getOperand(1).getConstantOperandVal(1));
201 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
202 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
203 Imm = APInt(VT.getScalarSizeInBits(),
204 Op.getOperand(1).getConstantOperandVal(0));
205 else
206 return false;
207
208 if (Imm != 1ULL << (ShtAmt - 1))
209 return false;
210
211 Res1 = Op.getOperand(0);
212 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
213 return true;
214 }
215
216 bool SelectDupZeroOrUndef(SDValue N) {
217 switch(N->getOpcode()) {
218 case ISD::UNDEF:
219 return true;
220 case AArch64ISD::DUP:
221 case ISD::SPLAT_VECTOR: {
222 auto Opnd0 = N->getOperand(0);
223 if (isNullConstant(Opnd0))
224 return true;
225 if (isNullFPConstant(Opnd0))
226 return true;
227 break;
228 }
229 default:
230 break;
231 }
232
233 return false;
234 }
235
236 bool SelectAny(SDValue) { return true; }
237
238 bool SelectDupZero(SDValue N) {
239 switch(N->getOpcode()) {
240 case AArch64ISD::DUP:
241 case ISD::SPLAT_VECTOR: {
242 auto Opnd0 = N->getOperand(0);
243 if (isNullConstant(Opnd0))
244 return true;
245 if (isNullFPConstant(Opnd0))
246 return true;
247 break;
248 }
249 }
250
251 return false;
252 }
253
254 template <MVT::SimpleValueType VT, bool Negate>
255 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
256 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
257 }
258
259 template <MVT::SimpleValueType VT, bool Negate>
260 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
261 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
262 }
263
264 template <MVT::SimpleValueType VT>
265 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
266 return SelectSVECpyDupImm(N, VT, Imm, Shift);
267 }
268
269 template <MVT::SimpleValueType VT, bool Invert = false>
270 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
271 return SelectSVELogicalImm(N, VT, Imm, Invert);
272 }
273
274 template <MVT::SimpleValueType VT>
275 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
276 return SelectSVEArithImm(N, VT, Imm);
277 }
278
279 template <unsigned Low, unsigned High, bool AllowSaturation = false>
280 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
281 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
282 }
283
284 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
285 if (N->getOpcode() != ISD::SPLAT_VECTOR)
286 return false;
287
288 EVT EltVT = N->getValueType(0).getVectorElementType();
289 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
290 /* High */ EltVT.getFixedSizeInBits(),
291 /* AllowSaturation */ true, Imm);
292 }
293
294 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
295 template<signed Min, signed Max, signed Scale, bool Shift>
296 bool SelectCntImm(SDValue N, SDValue &Imm) {
298 return false;
299
300 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
301 if (Shift)
302 MulImm = 1LL << MulImm;
303
304 if ((MulImm % std::abs(Scale)) != 0)
305 return false;
306
307 MulImm /= Scale;
308 if ((MulImm >= Min) && (MulImm <= Max)) {
309 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
310 return true;
311 }
312
313 return false;
314 }
315
316 template <signed Max, signed Scale>
317 bool SelectEXTImm(SDValue N, SDValue &Imm) {
319 return false;
320
321 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
322
323 if (MulImm >= 0 && MulImm <= Max) {
324 MulImm *= Scale;
325 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
326 return true;
327 }
328
329 return false;
330 }
331
332 template <unsigned BaseReg, unsigned Max>
333 bool ImmToReg(SDValue N, SDValue &Imm) {
334 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
335 uint64_t C = CI->getZExtValue();
336
337 if (C > Max)
338 return false;
339
340 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
341 return true;
342 }
343 return false;
344 }
345
346 /// Form sequences of consecutive 64/128-bit registers for use in NEON
347 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
348 /// between 1 and 4 elements. If it contains a single element that is returned
349 /// unchanged; otherwise a REG_SEQUENCE value is returned.
352 // Form a sequence of SVE registers for instructions using list of vectors,
353 // e.g. structured loads and stores (ldN, stN).
354 SDValue createZTuple(ArrayRef<SDValue> Vecs);
355
356 // Similar to above, except the register must start at a multiple of the
357 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
358 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
359
360 /// Generic helper for the createDTuple/createQTuple
361 /// functions. Those should almost always be called instead.
362 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
363 const unsigned SubRegs[]);
364
365 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
366
367 bool tryIndexedLoad(SDNode *N);
368
369 void SelectPtrauthAuth(SDNode *N);
370 void SelectPtrauthResign(SDNode *N);
371
372 bool trySelectStackSlotTagP(SDNode *N);
373 void SelectTagP(SDNode *N);
374
375 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
376 unsigned SubRegIdx);
377 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
378 unsigned SubRegIdx);
379 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
380 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
381 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
382 unsigned Opc_rr, unsigned Opc_ri,
383 bool IsIntr = false);
384 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
385 unsigned Scale, unsigned Opc_ri,
386 unsigned Opc_rr);
387 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
388 bool IsZmMulti, unsigned Opcode,
389 bool HasPred = false);
390 void SelectPExtPair(SDNode *N, unsigned Opc);
391 void SelectWhilePair(SDNode *N, unsigned Opc);
392 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
394 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
395 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
396 bool IsTupleInput, unsigned Opc);
397 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
398
399 template <unsigned MaxIdx, unsigned Scale>
400 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
401 unsigned Op);
402 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
403 unsigned Op, unsigned MaxIdx, unsigned Scale,
404 unsigned BaseReg = 0);
405 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
406 /// SVE Reg+Imm addressing mode.
407 template <int64_t Min, int64_t Max>
408 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
409 SDValue &OffImm);
410 /// SVE Reg+Reg address mode.
411 template <unsigned Scale>
412 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
413 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
414 }
415
416 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
417 unsigned Opc, uint32_t MaxImm);
418 void SelectMultiVectorLuti6LaneX4(SDNode *Node, unsigned NumIndexVecs);
419
420 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc,
421 unsigned NumInVecs);
422
423 template <unsigned MaxIdx, unsigned Scale>
424 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
425 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
426 }
427
428 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
429 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
430 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
431 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
432 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
433 unsigned Opc_rr, unsigned Opc_ri);
434 std::tuple<unsigned, SDValue, SDValue>
435 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
436 const SDValue &OldBase, const SDValue &OldOffset,
437 unsigned Scale);
438
439 bool tryBitfieldExtractOp(SDNode *N);
440 bool tryBitfieldExtractOpFromSExt(SDNode *N);
441 bool tryBitfieldInsertOp(SDNode *N);
442 bool tryBitfieldInsertInZeroOp(SDNode *N);
443 bool tryShiftAmountMod(SDNode *N);
444
445 bool tryReadRegister(SDNode *N);
446 bool tryWriteRegister(SDNode *N);
447
448 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
449 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
450
451 bool trySelectXAR(SDNode *N);
452
453 SDValue tryFoldCselToFMaxMin(SDNode &N);
454
455// Include the pieces autogenerated from the target description.
456#include "AArch64GenDAGISel.inc"
457
458private:
459 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
460 SDValue &Shift);
461 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
462 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
463 SDValue &OffImm) {
464 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
465 }
466 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
467 unsigned Size, SDValue &Base,
468 SDValue &OffImm);
469 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
470 SDValue &OffImm);
471 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
472 SDValue &OffImm);
473 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
474 SDValue &Offset, SDValue &SignExtend,
475 SDValue &DoShift);
476 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
477 SDValue &Offset, SDValue &SignExtend,
478 SDValue &DoShift);
479 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
480 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
481 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
482 SDValue &Offset, SDValue &SignExtend);
483
484 template<unsigned RegWidth>
485 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
486 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
487 }
488 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
489
490 template <unsigned RegWidth>
491 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos) {
492 return SelectCVTFixedPointVec(N, FixedPos, RegWidth);
493 }
494 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos, unsigned Width);
495
496 template<unsigned RegWidth>
497 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
498 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
499 }
500
501 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
502 unsigned Width);
503
504 template <unsigned FloatWidth>
505 bool SelectCVTFixedPosRecipOperandVec(SDValue N, SDValue &FixedPos) {
506 return SelectCVTFixedPosRecipOperandVec(N, FixedPos, FloatWidth);
507 }
508
509 bool SelectCVTFixedPosRecipOperandVec(SDValue N, SDValue &FixedPos,
510 unsigned Width);
511
512 bool SelectCMP_SWAP(SDNode *N);
513
514 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
515 bool Negate);
516 bool SelectSVEAddSubImm(SDLoc DL, APInt Value, MVT VT, SDValue &Imm,
517 SDValue &Shift, bool Negate);
518 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
519 bool Negate);
520 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
521 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
522
523 // Match `<NEON Splat> SVEImm` (where <NEON Splat> could be fmov, movi, etc).
524 bool SelectNEONSplatOfSVELogicalImm(SDValue N, SDValue &Imm);
525 bool SelectNEONSplatOfSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift);
526 bool SelectNEONSplatOfSVEArithSImm(SDValue N, SDValue &Imm);
527
528 bool SelectSVESignedArithImm(SDLoc DL, APInt Value, SDValue &Imm);
529 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
530 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
531 bool AllowSaturation, SDValue &Imm);
532
533 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
534 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
535 SDValue &Offset);
536 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
537 SDValue &Offset, unsigned Scale = 1);
538
539 bool SelectAllActivePredicate(SDValue N);
540 bool SelectAnyPredicate(SDValue N);
541
542 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
543
544 template <bool MatchCBB>
545 bool SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, SDValue &ExtType);
546};
547
548class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
549public:
550 static char ID;
551 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
552 CodeGenOptLevel OptLevel)
554 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
555};
556} // end anonymous namespace
557
558char AArch64DAGToDAGISelLegacy::ID = 0;
559
560INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
561
564 std::make_unique<AArch64DAGToDAGISel>(TM, TM.getOptLevel())) {}
565
566/// addBitcastHints - This method adds bitcast hints to the operands of a node
567/// to help instruction selector determine which operands are in Neon registers.
569 SDLoc DL(&N);
570 auto getFloatVT = [&](EVT VT) {
571 EVT ScalarVT = VT.getScalarType();
572 assert((ScalarVT == MVT::i32 || ScalarVT == MVT::i64) && "Unexpected VT");
573 return VT.changeElementType(*(DAG.getContext()),
574 ScalarVT == MVT::i32 ? MVT::f32 : MVT::f64);
575 };
577 NewOps.reserve(N.getNumOperands());
578
579 for (unsigned I = 0, E = N.getNumOperands(); I < E; ++I) {
580 auto bitcasted = DAG.getBitcast(getFloatVT(N.getOperand(I).getValueType()),
581 N.getOperand(I));
582 NewOps.push_back(bitcasted);
583 }
584 EVT OrigVT = N.getValueType(0);
585 SDValue OpNode = DAG.getNode(N.getOpcode(), DL, getFloatVT(OrigVT), NewOps);
586 return DAG.getBitcast(OrigVT, OpNode);
587}
588
589/// isIntImmediate - This method tests to see if the node is a constant
590/// operand. If so Imm will receive the 64-bit value.
591static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
593 Imm = C->getZExtValue();
594 return true;
595 }
596 return false;
597}
598
599// isIntImmediate - This method tests to see if a constant operand.
600// If so Imm will receive the value.
601static bool isIntImmediate(SDValue N, uint64_t &Imm) {
602 return isIntImmediate(N.getNode(), Imm);
603}
604
605// isOpcWithIntImmediate - This method tests to see if the node is a specific
606// opcode and that it has a immediate integer right operand.
607// If so Imm will receive the 32 bit value.
608static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
609 uint64_t &Imm) {
610 return N->getOpcode() == Opc &&
611 isIntImmediate(N->getOperand(1).getNode(), Imm);
612}
613
614// isIntImmediateEq - This method tests to see if N is a constant operand that
615// is equivalent to 'ImmExpected'.
616#ifndef NDEBUG
617static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
618 uint64_t Imm;
619 if (!isIntImmediate(N.getNode(), Imm))
620 return false;
621 return Imm == ImmExpected;
622}
623#endif
624
625static APInt DecodeFMOVImm(uint64_t Imm, unsigned RegWidth) {
626 assert(RegWidth == 32 || RegWidth == 64);
627 if (RegWidth == 32)
628 return APInt(RegWidth,
630 return APInt(RegWidth, AArch64_AM::decodeAdvSIMDModImmType12(Imm));
631}
632
633// Decodes the raw integer splat value from a NEON splat operation.
634static std::optional<APInt> DecodeNEONSplat(SDValue N) {
635 assert(N.getValueType().isInteger() && "Only integers are supported");
636 if (N->getOpcode() == AArch64ISD::NVCAST)
637 N = N->getOperand(0);
638 unsigned SplatWidth = N.getScalarValueSizeInBits();
639 if (N.getOpcode() == AArch64ISD::FMOV)
640 return DecodeFMOVImm(N.getConstantOperandVal(0), SplatWidth);
641 if (N->getOpcode() == AArch64ISD::MOVI)
642 return APInt(SplatWidth, N.getConstantOperandVal(0));
643 if (N->getOpcode() == AArch64ISD::MOVIshift)
644 return APInt(SplatWidth, N.getConstantOperandVal(0)
645 << N.getConstantOperandVal(1));
646 if (N->getOpcode() == AArch64ISD::MVNIshift)
647 return ~APInt(SplatWidth, N.getConstantOperandVal(0)
648 << N.getConstantOperandVal(1));
649 if (N->getOpcode() == AArch64ISD::MOVIedit)
651 N.getConstantOperandVal(0)));
652 if (N->getOpcode() == AArch64ISD::DUP)
653 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(0)))
654 return Const->getAPIntValue().trunc(SplatWidth);
655 // TODO: Recognize more splat-like NEON operations. See ConstantBuildVector
656 // in AArch64ISelLowering.
657 return std::nullopt;
658}
659
660// If \p N is a NEON splat operation (movi, fmov, etc), return the splat value
661// matching the element size of N.
662static std::optional<APInt> GetNEONSplatValue(SDValue N) {
663 unsigned SplatWidth = N.getScalarValueSizeInBits();
664 if (std::optional<APInt> SplatVal = DecodeNEONSplat(N)) {
665 if (SplatVal->getBitWidth() <= SplatWidth)
666 return APInt::getSplat(SplatWidth, *SplatVal);
667 if (SplatVal->isSplat(SplatWidth))
668 return SplatVal->trunc(SplatWidth);
669 }
670 return std::nullopt;
671}
672
673bool AArch64DAGToDAGISel::SelectNEONSplatOfSVELogicalImm(SDValue N,
674 SDValue &Imm) {
675 std::optional<APInt> ImmVal = GetNEONSplatValue(N);
676 if (!ImmVal)
677 return false;
678 uint64_t Encoding;
679 if (!AArch64_AM::isSVELogicalImm(N.getScalarValueSizeInBits(),
680 ImmVal->getZExtValue(), Encoding))
681 return false;
682
683 Imm = CurDAG->getTargetConstant(Encoding, SDLoc(N), MVT::i64);
684 return true;
685}
686
687bool AArch64DAGToDAGISel::SelectNEONSplatOfSVEAddSubImm(SDValue N, SDValue &Imm,
688 SDValue &Shift) {
689 if (std::optional<APInt> ImmVal = GetNEONSplatValue(N))
690 return SelectSVEAddSubImm(SDLoc(N), *ImmVal,
691 N.getValueType().getScalarType().getSimpleVT(),
692 Imm, Shift,
693 /*Negate=*/false);
694 return false;
695}
696
697bool AArch64DAGToDAGISel::SelectNEONSplatOfSVEArithSImm(SDValue N,
698 SDValue &Imm) {
699 if (std::optional<APInt> ImmVal = GetNEONSplatValue(N))
700 return SelectSVESignedArithImm(SDLoc(N), *ImmVal, Imm);
701 return false;
702}
703
704bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
705 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
706 std::vector<SDValue> &OutOps) {
707 switch(ConstraintID) {
708 default:
709 llvm_unreachable("Unexpected asm memory constraint");
710 case InlineAsm::ConstraintCode::m:
711 case InlineAsm::ConstraintCode::o:
712 case InlineAsm::ConstraintCode::Q:
713 // We need to make sure that this one operand does not end up in XZR, thus
714 // require the address to be in a PointerRegClass register.
715 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
716 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
717 SDLoc dl(Op);
718 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
719 SDValue NewOp =
720 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
721 dl, Op.getValueType(),
722 Op, RC), 0);
723 OutOps.push_back(NewOp);
724 return false;
725 }
726 return true;
727}
728
729/// SelectArithImmed - Select an immediate value that can be represented as
730/// a 12-bit value shifted left by either 0 or 12. If so, return true with
731/// Val set to the 12-bit value and Shift set to the shifter operand.
732bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
733 SDValue &Shift) {
734 // This function is called from the addsub_shifted_imm ComplexPattern,
735 // which lists [imm] as the list of opcode it's interested in, however
736 // we still need to check whether the operand is actually an immediate
737 // here because the ComplexPattern opcode list is only used in
738 // root-level opcode matching.
739 if (!isa<ConstantSDNode>(N.getNode()))
740 return false;
741
742 uint64_t Immed = N.getNode()->getAsZExtVal();
743
745 return false;
746
747 unsigned ShiftAmt = AArch64_AM::getArithImmedShift(Immed);
748 Immed >>= ShiftAmt;
749
750 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
751 SDLoc dl(N);
752 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
753 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
754 return true;
755}
756
757/// SelectNegArithImmed - As above, but negates the value before trying to
758/// select it.
759bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
760 SDValue &Shift) {
761 // This function is called from the addsub_shifted_imm ComplexPattern,
762 // which lists [imm] as the list of opcode it's interested in, however
763 // we still need to check whether the operand is actually an immediate
764 // here because the ComplexPattern opcode list is only used in
765 // root-level opcode matching.
766 if (!isa<ConstantSDNode>(N.getNode()))
767 return false;
768
769 // The immediate operand must be a 24-bit zero-extended immediate.
770 uint64_t Immed = N.getNode()->getAsZExtVal();
771
772 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
773 // have the opposite effect on the C flag, so this pattern mustn't match under
774 // those circumstances.
775 if (Immed == 0)
776 return false;
777
778 if (N.getValueType() == MVT::i32)
779 Immed = ~((uint32_t)Immed) + 1;
780 else
781 Immed = ~Immed + 1ULL;
782 if (Immed & 0xFFFFFFFFFF000000ULL)
783 return false;
784
785 Immed &= 0xFFFFFFULL;
786 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
787 Shift);
788}
789
790/// getShiftTypeForNode - Translate a shift node to the corresponding
791/// ShiftType value.
793 switch (N.getOpcode()) {
794 default:
796 case ISD::SHL:
797 return AArch64_AM::LSL;
798 case ISD::SRL:
799 return AArch64_AM::LSR;
800 case ISD::SRA:
801 return AArch64_AM::ASR;
802 case ISD::ROTR:
803 return AArch64_AM::ROR;
804 }
805}
806
808 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
809}
810
811/// Determine whether it is worth it to fold SHL into the addressing
812/// mode.
814 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
815 // It is worth folding logical shift of up to three places.
816 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
817 if (!CSD)
818 return false;
819 unsigned ShiftVal = CSD->getZExtValue();
820 if (ShiftVal > 3)
821 return false;
822
823 // Check if this particular node is reused in any non-memory related
824 // operation. If yes, do not try to fold this node into the address
825 // computation, since the computation will be kept.
826 const SDNode *Node = V.getNode();
827 for (SDNode *UI : Node->users())
828 if (!isMemOpOrPrefetch(UI))
829 for (SDNode *UII : UI->users())
830 if (!isMemOpOrPrefetch(UII))
831 return false;
832 return true;
833}
834
835/// Determine whether it is worth to fold V into an extended register addressing
836/// mode.
837bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
838 // Trivial if we are optimizing for code size or if there is only
839 // one use of the value.
840 if (CurDAG->shouldOptForSize() || V.hasOneUse())
841 return true;
842
843 // If a subtarget has a slow shift, folding a shift into multiple loads
844 // costs additional micro-ops.
845 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
846 return false;
847
848 // Check whether we're going to emit the address arithmetic anyway because
849 // it's used by a non-address operation.
850 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
851 return true;
852 if (V.getOpcode() == ISD::ADD) {
853 const SDValue LHS = V.getOperand(0);
854 const SDValue RHS = V.getOperand(1);
855 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
856 return true;
857 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
858 return true;
859 }
860
861 // It hurts otherwise, since the value will be reused.
862 return false;
863}
864
865/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
866/// to select more shifted register
867bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
868 SDValue &Shift) {
869 EVT VT = N.getValueType();
870 if (VT != MVT::i32 && VT != MVT::i64)
871 return false;
872
873 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
874 return false;
875 SDValue LHS = N.getOperand(0);
876 if (!LHS->hasOneUse())
877 return false;
878
879 unsigned LHSOpcode = LHS->getOpcode();
880 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
881 return false;
882
883 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
884 if (!ShiftAmtNode)
885 return false;
886
887 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
888 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
889 if (!RHSC)
890 return false;
891
892 APInt AndMask = RHSC->getAPIntValue();
893 unsigned LowZBits, MaskLen;
894 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
895 return false;
896
897 unsigned BitWidth = N.getValueSizeInBits();
898 SDLoc DL(LHS);
899 uint64_t NewShiftC;
900 unsigned NewShiftOp;
901 if (LHSOpcode == ISD::SHL) {
902 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
903 // BitWidth != LowZBits + MaskLen doesn't match the pattern
904 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
905 return false;
906
907 NewShiftC = LowZBits - ShiftAmtC;
908 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
909 } else {
910 if (LowZBits == 0)
911 return false;
912
913 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
914 NewShiftC = LowZBits + ShiftAmtC;
915 if (NewShiftC >= BitWidth)
916 return false;
917
918 // SRA need all high bits
919 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
920 return false;
921
922 // SRL high bits can be 0 or 1
923 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
924 return false;
925
926 if (LHSOpcode == ISD::SRL)
927 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
928 else
929 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
930 }
931
932 assert(NewShiftC < BitWidth && "Invalid shift amount");
933 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
934 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
935 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
936 NewShiftAmt, BitWidthMinus1),
937 0);
938 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
939 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
940 return true;
941}
942
943/// getExtendTypeForNode - Translate an extend node to the corresponding
944/// ExtendType value.
946getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
947 if (N.getOpcode() == ISD::SIGN_EXTEND ||
948 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
949 EVT SrcVT;
950 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
951 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
952 else
953 SrcVT = N.getOperand(0).getValueType();
954
955 if (!IsLoadStore && SrcVT == MVT::i8)
956 return AArch64_AM::SXTB;
957 else if (!IsLoadStore && SrcVT == MVT::i16)
958 return AArch64_AM::SXTH;
959 else if (SrcVT == MVT::i32)
960 return AArch64_AM::SXTW;
961 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
962
964 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
965 N.getOpcode() == ISD::ANY_EXTEND) {
966 EVT SrcVT = N.getOperand(0).getValueType();
967 if (!IsLoadStore && SrcVT == MVT::i8)
968 return AArch64_AM::UXTB;
969 else if (!IsLoadStore && SrcVT == MVT::i16)
970 return AArch64_AM::UXTH;
971 else if (SrcVT == MVT::i32)
972 return AArch64_AM::UXTW;
973 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
974
976 } else if (N.getOpcode() == ISD::AND) {
977 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
978 if (!CSD)
980 uint64_t AndMask = CSD->getZExtValue();
981
982 switch (AndMask) {
983 default:
985 case 0xFF:
986 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
987 case 0xFFFF:
988 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
989 case 0xFFFFFFFF:
990 return AArch64_AM::UXTW;
991 }
992 }
993
995}
996
997/// Determine whether it is worth to fold V into an extended register of an
998/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
999/// instruction, and the shift should be treated as worth folding even if has
1000/// multiple uses.
1001bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
1002 // Trivial if we are optimizing for code size or if there is only
1003 // one use of the value.
1004 if (CurDAG->shouldOptForSize() || V.hasOneUse())
1005 return true;
1006
1007 // If a subtarget has a fastpath LSL we can fold a logical shift into
1008 // the add/sub and save a cycle.
1009 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
1010 V.getConstantOperandVal(1) <= 4 &&
1012 return true;
1013
1014 // It hurts otherwise, since the value will be reused.
1015 return false;
1016}
1017
1018/// SelectShiftedRegister - Select a "shifted register" operand. If the value
1019/// is not shifted, set the Shift operand to default of "LSL 0". The logical
1020/// instructions allow the shifted register to be rotated, but the arithmetic
1021/// instructions do not. The AllowROR parameter specifies whether ROR is
1022/// supported.
1023bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
1024 SDValue &Reg, SDValue &Shift) {
1025 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
1026 return true;
1027
1029 if (ShType == AArch64_AM::InvalidShiftExtend)
1030 return false;
1031 if (!AllowROR && ShType == AArch64_AM::ROR)
1032 return false;
1033
1034 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1035 unsigned BitSize = N.getValueSizeInBits();
1036 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
1037 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
1038
1039 Reg = N.getOperand(0);
1040 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
1041 return isWorthFoldingALU(N, true);
1042 }
1043
1044 return false;
1045}
1046
1047/// Instructions that accept extend modifiers like UXTW expect the register
1048/// being extended to be a GPR32, but the incoming DAG might be acting on a
1049/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
1050/// this is the case.
1052 if (N.getValueType() == MVT::i32)
1053 return N;
1054
1055 SDLoc dl(N);
1056 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
1057}
1058
1059// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
1060template<signed Low, signed High, signed Scale>
1061bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
1062 if (!isa<ConstantSDNode>(N))
1063 return false;
1064
1065 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
1066 if ((MulImm % std::abs(Scale)) == 0) {
1067 int64_t RDVLImm = MulImm / Scale;
1068 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
1069 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
1070 return true;
1071 }
1072 }
1073
1074 return false;
1075}
1076
1077// Returns a suitable RDSVL multiplier from a left shift.
1078template <signed Low, signed High>
1079bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
1080 if (!isa<ConstantSDNode>(N))
1081 return false;
1082
1083 int64_t MulImm = 1LL << cast<ConstantSDNode>(N)->getSExtValue();
1084 if (MulImm >= Low && MulImm <= High) {
1085 Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
1086 return true;
1087 }
1088
1089 return false;
1090}
1091
1092/// SelectArithExtendedRegister - Select a "extended register" operand. This
1093/// operand folds in an extend followed by an optional left shift.
1094bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
1095 SDValue &Shift) {
1096 unsigned ShiftVal = 0;
1098
1099 if (N.getOpcode() == ISD::SHL) {
1100 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1101 if (!CSD)
1102 return false;
1103 ShiftVal = CSD->getZExtValue();
1104 if (ShiftVal > 4)
1105 return false;
1106
1107 Ext = getExtendTypeForNode(N.getOperand(0));
1109 return false;
1110
1111 Reg = N.getOperand(0).getOperand(0);
1112 } else {
1113 Ext = getExtendTypeForNode(N);
1115 return false;
1116
1117 // Don't match sext of vector extracts. These can use SMOV, but if we match
1118 // this as an extended register, we'll always fold the extend into an ALU op
1119 // user of the extend (which results in a UMOV).
1121 SDValue Op = N.getOperand(0);
1122 if (Op->getOpcode() == ISD::ANY_EXTEND)
1123 Op = Op->getOperand(0);
1124 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
1125 Op.getOperand(0).getValueType().isFixedLengthVector())
1126 return false;
1127 }
1128
1129 Reg = N.getOperand(0);
1130
1131 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
1132 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
1133 auto isDef32 = [](SDValue N) {
1134 unsigned Opc = N.getOpcode();
1135 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
1138 Opc != ISD::FREEZE;
1139 };
1140 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
1141 isDef32(Reg))
1142 return false;
1143 }
1144
1145 // AArch64 mandates that the RHS of the operation must use the smallest
1146 // register class that could contain the size being extended from. Thus,
1147 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1148 // there might not be an actual 32-bit value in the program. We can
1149 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1150 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1151 Reg = narrowIfNeeded(CurDAG, Reg);
1152 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1153 MVT::i32);
1154 return isWorthFoldingALU(N);
1155}
1156
1157/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1158/// operand is referred by the instructions have SP operand
1159bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1160 SDValue &Shift) {
1161 unsigned ShiftVal = 0;
1163
1164 if (N.getOpcode() != ISD::SHL)
1165 return false;
1166
1167 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1168 if (!CSD)
1169 return false;
1170 ShiftVal = CSD->getZExtValue();
1171 if (ShiftVal > 4)
1172 return false;
1173
1174 Ext = AArch64_AM::UXTX;
1175 Reg = N.getOperand(0);
1176 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1177 MVT::i32);
1178 return isWorthFoldingALU(N);
1179}
1180
1181/// If there's a use of this ADDlow that's not itself a load/store then we'll
1182/// need to create a real ADD instruction from it anyway and there's no point in
1183/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1184/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1185/// leads to duplicated ADRP instructions.
1187 for (auto *User : N->users()) {
1188 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1189 User->getOpcode() != ISD::ATOMIC_LOAD &&
1190 User->getOpcode() != ISD::ATOMIC_STORE)
1191 return false;
1192
1193 // ldar and stlr have much more restrictive addressing modes (just a
1194 // register).
1195 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1196 return false;
1197 }
1198
1199 return true;
1200}
1201
1202/// Check if the immediate offset is valid as a scaled immediate.
1203static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1204 unsigned Size) {
1205 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1206 Offset < (Range << Log2_32(Size)))
1207 return true;
1208 return false;
1209}
1210
1211/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1212/// immediate" address. The "Size" argument is the size in bytes of the memory
1213/// reference, which determines the scale.
1214bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1215 unsigned BW, unsigned Size,
1216 SDValue &Base,
1217 SDValue &OffImm) {
1218 SDLoc dl(N);
1219 const DataLayout &DL = CurDAG->getDataLayout();
1220 const TargetLowering *TLI = getTargetLowering();
1221 if (N.getOpcode() == ISD::FrameIndex) {
1222 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1223 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1224 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1225 return true;
1226 }
1227
1228 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1229 // selected here doesn't support labels/immediates, only base+offset.
1230 if (CurDAG->isBaseWithConstantOffset(N)) {
1231 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1232 if (IsSignedImm) {
1233 int64_t RHSC = RHS->getSExtValue();
1234 unsigned Scale = Log2_32(Size);
1235 int64_t Range = 0x1LL << (BW - 1);
1236
1237 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1238 RHSC < (Range << Scale)) {
1239 Base = N.getOperand(0);
1240 if (Base.getOpcode() == ISD::FrameIndex) {
1241 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1242 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1243 }
1244 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1245 return true;
1246 }
1247 } else {
1248 // unsigned Immediate
1249 uint64_t RHSC = RHS->getZExtValue();
1250 unsigned Scale = Log2_32(Size);
1251 uint64_t Range = 0x1ULL << BW;
1252
1253 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1254 Base = N.getOperand(0);
1255 if (Base.getOpcode() == ISD::FrameIndex) {
1256 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1257 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1258 }
1259 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1260 return true;
1261 }
1262 }
1263 }
1264 }
1265 // Base only. The address will be materialized into a register before
1266 // the memory is accessed.
1267 // add x0, Xbase, #offset
1268 // stp x1, x2, [x0]
1269 Base = N;
1270 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1271 return true;
1272}
1273
1274/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1275/// immediate" address. The "Size" argument is the size in bytes of the memory
1276/// reference, which determines the scale.
1277bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1278 SDValue &Base, SDValue &OffImm) {
1279 SDLoc dl(N);
1280 const DataLayout &DL = CurDAG->getDataLayout();
1281 const TargetLowering *TLI = getTargetLowering();
1282 if (N.getOpcode() == ISD::FrameIndex) {
1283 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1284 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1285 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1286 return true;
1287 }
1288
1289 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1290 GlobalAddressSDNode *GAN =
1291 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1292 Base = N.getOperand(0);
1293 OffImm = N.getOperand(1);
1294 if (!GAN)
1295 return true;
1296
1297 if (GAN->getOffset() % Size == 0 &&
1299 return true;
1300 }
1301
1302 if (CurDAG->isBaseWithConstantOffset(N)) {
1303 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1304 int64_t RHSC = (int64_t)RHS->getZExtValue();
1305 unsigned Scale = Log2_32(Size);
1306 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1307 Base = N.getOperand(0);
1308 if (Base.getOpcode() == ISD::FrameIndex) {
1309 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1310 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1311 }
1312 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1313 return true;
1314 }
1315 }
1316 }
1317
1318 // Before falling back to our general case, check if the unscaled
1319 // instructions can handle this. If so, that's preferable.
1320 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1321 return false;
1322
1323 // Base only. The address will be materialized into a register before
1324 // the memory is accessed.
1325 // add x0, Xbase, #offset
1326 // ldr x0, [x0]
1327 Base = N;
1328 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1329 return true;
1330}
1331
1332/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1333/// immediate" address. This should only match when there is an offset that
1334/// is not valid for a scaled immediate addressing mode. The "Size" argument
1335/// is the size in bytes of the memory reference, which is needed here to know
1336/// what is valid for a scaled immediate.
1337bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1338 SDValue &Base,
1339 SDValue &OffImm) {
1340 if (!CurDAG->isBaseWithConstantOffset(N))
1341 return false;
1342 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1343 int64_t RHSC = RHS->getSExtValue();
1344 if (RHSC >= -256 && RHSC < 256) {
1345 Base = N.getOperand(0);
1346 if (Base.getOpcode() == ISD::FrameIndex) {
1347 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1348 const TargetLowering *TLI = getTargetLowering();
1349 Base = CurDAG->getTargetFrameIndex(
1350 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1351 }
1352 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1353 return true;
1354 }
1355 }
1356 return false;
1357}
1358
1360 SDLoc dl(N);
1361 SDValue ImpDef = SDValue(
1362 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1363 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1364 N);
1365}
1366
1367/// Check if the given SHL node (\p N), can be used to form an
1368/// extended register for an addressing mode.
1369bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1370 bool WantExtend, SDValue &Offset,
1371 SDValue &SignExtend) {
1372 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1373 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1374 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1375 return false;
1376
1377 SDLoc dl(N);
1378 if (WantExtend) {
1380 getExtendTypeForNode(N.getOperand(0), true);
1382 return false;
1383
1384 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1385 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1386 MVT::i32);
1387 } else {
1388 Offset = N.getOperand(0);
1389 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1390 }
1391
1392 unsigned LegalShiftVal = Log2_32(Size);
1393 unsigned ShiftVal = CSD->getZExtValue();
1394
1395 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1396 return false;
1397
1398 return isWorthFoldingAddr(N, Size);
1399}
1400
1401bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1403 SDValue &SignExtend,
1404 SDValue &DoShift) {
1405 if (N.getOpcode() != ISD::ADD)
1406 return false;
1407 SDValue LHS = N.getOperand(0);
1408 SDValue RHS = N.getOperand(1);
1409 SDLoc dl(N);
1410
1411 // We don't want to match immediate adds here, because they are better lowered
1412 // to the register-immediate addressing modes.
1414 return false;
1415
1416 // Check if this particular node is reused in any non-memory related
1417 // operation. If yes, do not try to fold this node into the address
1418 // computation, since the computation will be kept.
1419 const SDNode *Node = N.getNode();
1420 for (SDNode *UI : Node->users()) {
1421 if (!isMemOpOrPrefetch(UI))
1422 return false;
1423 }
1424
1425 // Remember if it is worth folding N when it produces extended register.
1426 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1427
1428 // Try to match a shifted extend on the RHS.
1429 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1430 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1431 Base = LHS;
1432 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1433 return true;
1434 }
1435
1436 // Try to match a shifted extend on the LHS.
1437 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1438 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1439 Base = RHS;
1440 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1441 return true;
1442 }
1443
1444 // There was no shift, whatever else we find.
1445 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1446
1448 // Try to match an unshifted extend on the LHS.
1449 if (IsExtendedRegisterWorthFolding &&
1450 (Ext = getExtendTypeForNode(LHS, true)) !=
1452 Base = RHS;
1453 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1454 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1455 MVT::i32);
1456 if (isWorthFoldingAddr(LHS, Size))
1457 return true;
1458 }
1459
1460 // Try to match an unshifted extend on the RHS.
1461 if (IsExtendedRegisterWorthFolding &&
1462 (Ext = getExtendTypeForNode(RHS, true)) !=
1464 Base = LHS;
1465 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1466 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1467 MVT::i32);
1468 if (isWorthFoldingAddr(RHS, Size))
1469 return true;
1470 }
1471
1472 return false;
1473}
1474
1475// Check if the given immediate is preferred by ADD. If an immediate can be
1476// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1477// encoded by one MOVZ, return true.
1478static bool isPreferredADD(int64_t ImmOff) {
1479 // Constant in [0x0, 0xfff] can be encoded in ADD.
1480 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1481 return true;
1482 // Check if it can be encoded in an "ADD LSL #12".
1483 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1484 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1485 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1486 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1487 return false;
1488}
1489
1490bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1492 SDValue &SignExtend,
1493 SDValue &DoShift) {
1494 if (N.getOpcode() != ISD::ADD)
1495 return false;
1496 SDValue LHS = N.getOperand(0);
1497 SDValue RHS = N.getOperand(1);
1498 SDLoc DL(N);
1499
1500 // Check if this particular node is reused in any non-memory related
1501 // operation. If yes, do not try to fold this node into the address
1502 // computation, since the computation will be kept.
1503 const SDNode *Node = N.getNode();
1504 for (SDNode *UI : Node->users()) {
1505 if (!isMemOpOrPrefetch(UI))
1506 return false;
1507 }
1508
1509 // Watch out if RHS is a wide immediate, it can not be selected into
1510 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1511 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1512 // instructions like:
1513 // MOV X0, WideImmediate
1514 // ADD X1, BaseReg, X0
1515 // LDR X2, [X1, 0]
1516 // For such situation, using [BaseReg, XReg] addressing mode can save one
1517 // ADD/SUB:
1518 // MOV X0, WideImmediate
1519 // LDR X2, [BaseReg, X0]
1520 if (isa<ConstantSDNode>(RHS)) {
1521 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1522 // Skip the immediate can be selected by load/store addressing mode.
1523 // Also skip the immediate can be encoded by a single ADD (SUB is also
1524 // checked by using -ImmOff).
1525 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1526 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1527 return false;
1528
1529 SDValue Ops[] = { RHS };
1530 SDNode *MOVI =
1531 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1532 SDValue MOVIV = SDValue(MOVI, 0);
1533 // This ADD of two X register will be selected into [Reg+Reg] mode.
1534 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1535 }
1536
1537 // Remember if it is worth folding N when it produces extended register.
1538 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1539
1540 // Try to match a shifted extend on the RHS.
1541 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1542 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1543 Base = LHS;
1544 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1545 return true;
1546 }
1547
1548 // Try to match a shifted extend on the LHS.
1549 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1550 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1551 Base = RHS;
1552 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1553 return true;
1554 }
1555
1556 // Match any non-shifted, non-extend, non-immediate add expression.
1557 Base = LHS;
1558 Offset = RHS;
1559 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1560 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1561 // Reg1 + Reg2 is free: no check needed.
1562 return true;
1563}
1564
1565SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1566 static const unsigned RegClassIDs[] = {
1567 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1568 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1569 AArch64::dsub2, AArch64::dsub3};
1570
1571 return createTuple(Regs, RegClassIDs, SubRegs);
1572}
1573
1574SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1575 static const unsigned RegClassIDs[] = {
1576 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1577 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1578 AArch64::qsub2, AArch64::qsub3};
1579
1580 return createTuple(Regs, RegClassIDs, SubRegs);
1581}
1582
1583SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1584 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1585 AArch64::ZPR3RegClassID,
1586 AArch64::ZPR4RegClassID};
1587 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1588 AArch64::zsub2, AArch64::zsub3};
1589
1590 return createTuple(Regs, RegClassIDs, SubRegs);
1591}
1592
1593SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1594 assert(Regs.size() == 2 || Regs.size() == 4);
1595
1596 // The createTuple interface requires 3 RegClassIDs for each possible
1597 // tuple type even though we only have them for ZPR2 and ZPR4.
1598 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1599 AArch64::ZPR4Mul4RegClassID};
1600 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1601 AArch64::zsub2, AArch64::zsub3};
1602 return createTuple(Regs, RegClassIDs, SubRegs);
1603}
1604
1605SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1606 const unsigned RegClassIDs[],
1607 const unsigned SubRegs[]) {
1608 // There's no special register-class for a vector-list of 1 element: it's just
1609 // a vector.
1610 if (Regs.size() == 1)
1611 return Regs[0];
1612
1613 assert(Regs.size() >= 2 && Regs.size() <= 4);
1614
1615 SDLoc DL(Regs[0]);
1616
1618
1619 // First operand of REG_SEQUENCE is the desired RegClass.
1620 Ops.push_back(
1621 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1622
1623 // Then we get pairs of source & subregister-position for the components.
1624 for (unsigned i = 0; i < Regs.size(); ++i) {
1625 Ops.push_back(Regs[i]);
1626 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1627 }
1628
1629 SDNode *N =
1630 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1631 return SDValue(N, 0);
1632}
1633
1634void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1635 bool isExt) {
1636 SDLoc dl(N);
1637 EVT VT = N->getValueType(0);
1638
1639 unsigned ExtOff = isExt;
1640
1641 // Form a REG_SEQUENCE to force register allocation.
1642 unsigned Vec0Off = ExtOff + 1;
1643 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1644 SDValue RegSeq = createQTuple(Regs);
1645
1647 if (isExt)
1648 Ops.push_back(N->getOperand(1));
1649 Ops.push_back(RegSeq);
1650 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1651 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1652}
1653
1654static std::tuple<SDValue, SDValue>
1656 SDLoc DL(Disc);
1657 SDValue AddrDisc;
1658 SDValue ConstDisc;
1659
1660 // If this is a blend, remember the constant and address discriminators.
1661 // Otherwise, it's either a constant discriminator, or a non-blended
1662 // address discriminator.
1663 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1664 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1665 AddrDisc = Disc->getOperand(1);
1666 ConstDisc = Disc->getOperand(2);
1667 } else {
1668 ConstDisc = Disc;
1669 }
1670
1671 // If the constant discriminator (either the blend RHS, or the entire
1672 // discriminator value) isn't a 16-bit constant, bail out, and let the
1673 // discriminator be computed separately.
1674 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1675 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1676 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1677
1678 // If there's no address discriminator, use XZR directly.
1679 if (!AddrDisc)
1680 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1681
1682 return std::make_tuple(
1683 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1684 AddrDisc);
1685}
1686
1687void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1688 SDLoc DL(N);
1689 // IntrinsicID is operand #0
1690 SDValue Val = N->getOperand(1);
1691 SDValue AUTKey = N->getOperand(2);
1692 SDValue AUTDisc = N->getOperand(3);
1693
1694 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1695 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1696
1697 SDValue AUTAddrDisc, AUTConstDisc;
1698 std::tie(AUTConstDisc, AUTAddrDisc) =
1699 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1700
1701 if (!Subtarget->isX16X17Safer()) {
1702 std::vector<SDValue> Ops = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1703 // Copy deactivation symbol if present.
1704 if (N->getNumOperands() > 4)
1705 Ops.push_back(N->getOperand(4));
1706
1707 SDNode *AUT =
1708 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1709 ReplaceNode(N, AUT);
1710 } else {
1711 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1712 AArch64::X16, Val, SDValue());
1713 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1714
1715 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1716 ReplaceNode(N, AUT);
1717 }
1718}
1719
1720void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1721 SDLoc DL(N);
1722 // IntrinsicID is operand #0, if W_CHAIN it is #1
1723 int OffsetBase = N->getOpcode() == ISD::INTRINSIC_W_CHAIN ? 1 : 0;
1724 SDValue Val = N->getOperand(OffsetBase + 1);
1725 SDValue AUTKey = N->getOperand(OffsetBase + 2);
1726 SDValue AUTDisc = N->getOperand(OffsetBase + 3);
1727 SDValue PACKey = N->getOperand(OffsetBase + 4);
1728 SDValue PACDisc = N->getOperand(OffsetBase + 5);
1729 uint32_t IntNum = N->getConstantOperandVal(OffsetBase + 0);
1730 bool HasLoad = IntNum == Intrinsic::ptrauth_resign_load_relative;
1731
1732 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1733 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1734
1735 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1736 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1737
1738 SDValue AUTAddrDisc, AUTConstDisc;
1739 std::tie(AUTConstDisc, AUTAddrDisc) =
1740 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1741
1742 SDValue PACAddrDisc, PACConstDisc;
1743 std::tie(PACConstDisc, PACAddrDisc) =
1744 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1745
1746 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1747 AArch64::X16, Val, SDValue());
1748
1749 if (HasLoad) {
1750 SDValue Addend = N->getOperand(OffsetBase + 6);
1751 SDValue IncomingChain = N->getOperand(0);
1752 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc,
1753 PACKey, PACConstDisc, PACAddrDisc,
1754 Addend, IncomingChain, X16Copy.getValue(1)};
1755
1756 SDNode *AUTRELLOADPAC = CurDAG->getMachineNode(AArch64::AUTRELLOADPAC, DL,
1757 MVT::i64, MVT::Other, Ops);
1758 ReplaceNode(N, AUTRELLOADPAC);
1759 } else {
1760 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1761 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1762
1763 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1764 ReplaceNode(N, AUTPAC);
1765 }
1766}
1767
1768bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1769 LoadSDNode *LD = cast<LoadSDNode>(N);
1770 if (LD->isUnindexed())
1771 return false;
1772 EVT VT = LD->getMemoryVT();
1773 EVT DstVT = N->getValueType(0);
1774 ISD::MemIndexedMode AM = LD->getAddressingMode();
1775 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1776 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1777 int OffsetVal = (int)OffsetOp->getZExtValue();
1778
1779 // We're not doing validity checking here. That was done when checking
1780 // if we should mark the load as indexed or not. We're just selecting
1781 // the right instruction.
1782 unsigned Opcode = 0;
1783
1784 ISD::LoadExtType ExtType = LD->getExtensionType();
1785 bool InsertTo64 = false;
1786 if (VT == MVT::i64)
1787 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1788 else if (VT == MVT::i32) {
1789 if (ExtType == ISD::NON_EXTLOAD)
1790 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1791 else if (ExtType == ISD::SEXTLOAD)
1792 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1793 else {
1794 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1795 InsertTo64 = true;
1796 // The result of the load is only i32. It's the subreg_to_reg that makes
1797 // it into an i64.
1798 DstVT = MVT::i32;
1799 }
1800 } else if (VT == MVT::i16) {
1801 if (ExtType == ISD::SEXTLOAD) {
1802 if (DstVT == MVT::i64)
1803 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1804 else
1805 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1806 } else {
1807 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1808 InsertTo64 = DstVT == MVT::i64;
1809 // The result of the load is only i32. It's the subreg_to_reg that makes
1810 // it into an i64.
1811 DstVT = MVT::i32;
1812 }
1813 } else if (VT == MVT::i8) {
1814 if (ExtType == ISD::SEXTLOAD) {
1815 if (DstVT == MVT::i64)
1816 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1817 else
1818 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1819 } else {
1820 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1821 InsertTo64 = DstVT == MVT::i64;
1822 // The result of the load is only i32. It's the subreg_to_reg that makes
1823 // it into an i64.
1824 DstVT = MVT::i32;
1825 }
1826 } else if (VT == MVT::f16) {
1827 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1828 } else if (VT == MVT::bf16) {
1829 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1830 } else if (VT == MVT::f32) {
1831 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1832 } else if (VT == MVT::f64 ||
1833 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1834 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1835 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1836 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1837 } else if (VT.is64BitVector()) {
1838 if (IsPre || OffsetVal != 8)
1839 return false;
1840 switch (VT.getScalarSizeInBits()) {
1841 case 8:
1842 Opcode = AArch64::LD1Onev8b_POST;
1843 break;
1844 case 16:
1845 Opcode = AArch64::LD1Onev4h_POST;
1846 break;
1847 case 32:
1848 Opcode = AArch64::LD1Onev2s_POST;
1849 break;
1850 case 64:
1851 Opcode = AArch64::LD1Onev1d_POST;
1852 break;
1853 default:
1854 llvm_unreachable("Expected vector element to be a power of 2");
1855 }
1856 } else if (VT.is128BitVector()) {
1857 if (IsPre || OffsetVal != 16)
1858 return false;
1859 switch (VT.getScalarSizeInBits()) {
1860 case 8:
1861 Opcode = AArch64::LD1Onev16b_POST;
1862 break;
1863 case 16:
1864 Opcode = AArch64::LD1Onev8h_POST;
1865 break;
1866 case 32:
1867 Opcode = AArch64::LD1Onev4s_POST;
1868 break;
1869 case 64:
1870 Opcode = AArch64::LD1Onev2d_POST;
1871 break;
1872 default:
1873 llvm_unreachable("Expected vector element to be a power of 2");
1874 }
1875 } else
1876 return false;
1877 SDValue Chain = LD->getChain();
1878 SDValue Base = LD->getBasePtr();
1879 SDLoc dl(N);
1880 // LD1 encodes an immediate offset by using XZR as the offset register.
1881 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1882 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1883 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1884 SDValue Ops[] = { Base, Offset, Chain };
1885 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1886 MVT::Other, Ops);
1887
1888 // Transfer memoperands.
1889 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1890 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1891
1892 // Either way, we're replacing the node, so tell the caller that.
1893 SDValue LoadedVal = SDValue(Res, 1);
1894 if (InsertTo64) {
1895 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1896 LoadedVal = SDValue(CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, dl,
1897 MVT::i64, LoadedVal, SubReg),
1898 0);
1899 }
1900
1901 ReplaceUses(SDValue(N, 0), LoadedVal);
1902 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1903 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1904 CurDAG->RemoveDeadNode(N);
1905 return true;
1906}
1907
1908void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1909 unsigned SubRegIdx) {
1910 SDLoc dl(N);
1911 EVT VT = N->getValueType(0);
1912 SDValue Chain = N->getOperand(0);
1913
1914 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1915 Chain};
1916
1917 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1918
1919 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1920 SDValue SuperReg = SDValue(Ld, 0);
1921 for (unsigned i = 0; i < NumVecs; ++i)
1922 ReplaceUses(SDValue(N, i),
1923 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1924
1925 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1926
1927 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1928 // because it's too simple to have needed special treatment during lowering.
1929 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1930 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1931 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1932 }
1933
1934 CurDAG->RemoveDeadNode(N);
1935}
1936
1937void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1938 unsigned Opc, unsigned SubRegIdx) {
1939 SDLoc dl(N);
1940 EVT VT = N->getValueType(0);
1941 SDValue Chain = N->getOperand(0);
1942
1943 SDValue Ops[] = {N->getOperand(1), // Mem operand
1944 N->getOperand(2), // Incremental
1945 Chain};
1946
1947 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1948 MVT::Untyped, MVT::Other};
1949
1950 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1951
1952 // Update uses of write back register
1953 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1954
1955 // Update uses of vector list
1956 SDValue SuperReg = SDValue(Ld, 1);
1957 if (NumVecs == 1)
1958 ReplaceUses(SDValue(N, 0), SuperReg);
1959 else
1960 for (unsigned i = 0; i < NumVecs; ++i)
1961 ReplaceUses(SDValue(N, i),
1962 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1963
1964 // Transfer memoperands.
1965 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1966 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1967
1968 // Update the chain
1969 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1970 CurDAG->RemoveDeadNode(N);
1971}
1972
1973/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1974/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1975/// new Base and an SDValue representing the new offset.
1976std::tuple<unsigned, SDValue, SDValue>
1977AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1978 unsigned Opc_ri,
1979 const SDValue &OldBase,
1980 const SDValue &OldOffset,
1981 unsigned Scale) {
1982 SDValue NewBase = OldBase;
1983 SDValue NewOffset = OldOffset;
1984 // Detect a possible Reg+Imm addressing mode.
1985 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1986 N, OldBase, NewBase, NewOffset);
1987
1988 // Detect a possible reg+reg addressing mode, but only if we haven't already
1989 // detected a Reg+Imm one.
1990 const bool IsRegReg =
1991 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1992
1993 // Select the instruction.
1994 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1995}
1996
1997enum class SelectTypeKind {
1998 Int1 = 0,
1999 Int = 1,
2000 FP = 2,
2002};
2003
2004/// This function selects an opcode from a list of opcodes, which is
2005/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
2006/// element types, in this order.
2007template <SelectTypeKind Kind>
2008static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
2009 // Only match scalable vector VTs
2010 if (!VT.isScalableVector())
2011 return 0;
2012
2013 EVT EltVT = VT.getVectorElementType();
2014 unsigned Key = VT.getVectorMinNumElements();
2015 switch (Kind) {
2017 break;
2019 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
2020 EltVT != MVT::i64)
2021 return 0;
2022 break;
2024 if (EltVT != MVT::i1)
2025 return 0;
2026 break;
2027 case SelectTypeKind::FP:
2028 if (EltVT == MVT::bf16)
2029 Key = 16;
2030 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
2031 EltVT != MVT::f64)
2032 return 0;
2033 break;
2034 }
2035
2036 unsigned Offset;
2037 switch (Key) {
2038 case 16: // 8-bit or bf16
2039 Offset = 0;
2040 break;
2041 case 8: // 16-bit
2042 Offset = 1;
2043 break;
2044 case 4: // 32-bit
2045 Offset = 2;
2046 break;
2047 case 2: // 64-bit
2048 Offset = 3;
2049 break;
2050 default:
2051 return 0;
2052 }
2053
2054 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
2055}
2056
2057// This function is almost identical to SelectWhilePair, but has an
2058// extra check on the range of the immediate operand.
2059// TODO: Merge these two functions together at some point?
2060void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
2061 // Immediate can be either 0 or 1.
2062 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
2063 if (Imm->getZExtValue() > 1)
2064 return;
2065
2066 SDLoc DL(N);
2067 EVT VT = N->getValueType(0);
2068 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
2069 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2070 SDValue SuperReg = SDValue(WhilePair, 0);
2071
2072 for (unsigned I = 0; I < 2; ++I)
2073 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2074 AArch64::psub0 + I, DL, VT, SuperReg));
2075
2076 CurDAG->RemoveDeadNode(N);
2077}
2078
2079void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
2080 SDLoc DL(N);
2081 EVT VT = N->getValueType(0);
2082
2083 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
2084
2085 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2086 SDValue SuperReg = SDValue(WhilePair, 0);
2087
2088 for (unsigned I = 0; I < 2; ++I)
2089 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2090 AArch64::psub0 + I, DL, VT, SuperReg));
2091
2092 CurDAG->RemoveDeadNode(N);
2093}
2094
2095void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
2096 unsigned Opcode) {
2097 EVT VT = N->getValueType(0);
2098 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2099 SDValue Ops = createZTuple(Regs);
2100 SDLoc DL(N);
2101 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
2102 SDValue SuperReg = SDValue(Intrinsic, 0);
2103 for (unsigned i = 0; i < NumVecs; ++i)
2104 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2105 AArch64::zsub0 + i, DL, VT, SuperReg));
2106
2107 CurDAG->RemoveDeadNode(N);
2108}
2109
2110void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
2111 unsigned Opcode) {
2112 SDLoc DL(N);
2113 EVT VT = N->getValueType(0);
2114 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
2115 Ops.push_back(/*Chain*/ N->getOperand(0));
2116
2117 SDNode *Instruction =
2118 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
2119 SDValue SuperReg = SDValue(Instruction, 0);
2120
2121 for (unsigned i = 0; i < NumVecs; ++i)
2122 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2123 AArch64::zsub0 + i, DL, VT, SuperReg));
2124
2125 // Copy chain
2126 unsigned ChainIdx = NumVecs;
2127 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
2128 CurDAG->RemoveDeadNode(N);
2129}
2130
2131void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
2132 unsigned NumVecs,
2133 bool IsZmMulti,
2134 unsigned Opcode,
2135 bool HasPred) {
2136 assert(Opcode != 0 && "Unexpected opcode");
2137
2138 SDLoc DL(N);
2139 EVT VT = N->getValueType(0);
2140 SDUse *OpsIter = N->op_begin() + 1; // Skip intrinsic ID
2142
2143 auto GetMultiVecOperand = [&]() {
2144 SmallVector<SDValue, 4> Regs(OpsIter, OpsIter + NumVecs);
2145 OpsIter += NumVecs;
2146 return createZMulTuple(Regs);
2147 };
2148
2149 if (HasPred)
2150 Ops.push_back(*OpsIter++);
2151
2152 Ops.push_back(GetMultiVecOperand());
2153 if (IsZmMulti)
2154 Ops.push_back(GetMultiVecOperand());
2155 else
2156 Ops.push_back(*OpsIter++);
2157
2158 // Append any remaining operands.
2159 Ops.append(OpsIter, N->op_end());
2160 SDNode *Intrinsic;
2161 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
2162 SDValue SuperReg = SDValue(Intrinsic, 0);
2163 for (unsigned i = 0; i < NumVecs; ++i)
2164 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2165 AArch64::zsub0 + i, DL, VT, SuperReg));
2166
2167 CurDAG->RemoveDeadNode(N);
2168}
2169
2170void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2171 unsigned Scale, unsigned Opc_ri,
2172 unsigned Opc_rr, bool IsIntr) {
2173 assert(Scale < 5 && "Invalid scaling value.");
2174 SDLoc DL(N);
2175 EVT VT = N->getValueType(0);
2176 SDValue Chain = N->getOperand(0);
2177
2178 // Optimize addressing mode.
2180 unsigned Opc;
2181 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2182 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
2183 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2184
2185 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2186 Base, // Memory operand
2187 Offset, Chain};
2188
2189 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2190
2191 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2192 SDValue SuperReg = SDValue(Load, 0);
2193 for (unsigned i = 0; i < NumVecs; ++i)
2194 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2195 AArch64::zsub0 + i, DL, VT, SuperReg));
2196
2197 // Copy chain
2198 unsigned ChainIdx = NumVecs;
2199 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2200 CurDAG->RemoveDeadNode(N);
2201}
2202
2203void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2204 unsigned NumVecs,
2205 unsigned Scale,
2206 unsigned Opc_ri,
2207 unsigned Opc_rr) {
2208 assert(Scale < 4 && "Invalid scaling value.");
2209 SDLoc DL(N);
2210 EVT VT = N->getValueType(0);
2211 SDValue Chain = N->getOperand(0);
2212
2213 SDValue PNg = N->getOperand(2);
2214 SDValue Base = N->getOperand(3);
2215 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2216 unsigned Opc;
2217 std::tie(Opc, Base, Offset) =
2218 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2219
2220 SDValue Ops[] = {PNg, // Predicate-as-counter
2221 Base, // Memory operand
2222 Offset, Chain};
2223
2224 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2225
2226 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2227 SDValue SuperReg = SDValue(Load, 0);
2228 for (unsigned i = 0; i < NumVecs; ++i)
2229 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2230 AArch64::zsub0 + i, DL, VT, SuperReg));
2231
2232 // Copy chain
2233 unsigned ChainIdx = NumVecs;
2234 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2235 CurDAG->RemoveDeadNode(N);
2236}
2237
2238void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2239 unsigned Opcode) {
2240 if (N->getValueType(0) != MVT::nxv4f32)
2241 return;
2242 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2243}
2244
2245void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2246 unsigned NumOutVecs,
2247 unsigned Opc,
2248 uint32_t MaxImm) {
2249 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2250 if (Imm->getZExtValue() > MaxImm)
2251 return;
2252
2253 SDValue ZtValue;
2254 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2255 return;
2256
2257 SDValue Chain = Node->getOperand(0);
2258 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4), Chain};
2259 SDLoc DL(Node);
2260 EVT VT = Node->getValueType(0);
2261
2262 SDNode *Instruction =
2263 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2264 SDValue SuperReg = SDValue(Instruction, 0);
2265
2266 for (unsigned I = 0; I < NumOutVecs; ++I)
2267 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2268 AArch64::zsub0 + I, DL, VT, SuperReg));
2269
2270 // Copy chain
2271 unsigned ChainIdx = NumOutVecs;
2272 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2273 CurDAG->RemoveDeadNode(Node);
2274}
2275
2276void AArch64DAGToDAGISel::SelectMultiVectorLuti6LaneX4(SDNode *Node,
2277 unsigned NumIndexVecs) {
2278 assert((NumIndexVecs == 2 || NumIndexVecs == 3) &&
2279 "unexpected number of index vectors");
2280
2281 constexpr unsigned FirstIndexOp = 3;
2282 unsigned ImmOp = FirstIndexOp + NumIndexVecs;
2283 auto *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(ImmOp));
2284 if (!Imm || Imm->getZExtValue() > 1)
2285 return;
2286
2287 // The luti6 instruction always takes a 2-register Zm index tuple. The x3
2288 // ACLE form provides three index vectors, so the lane selects which adjacent
2289 // pair to use before forming Zm (op 3/4 or op 4/5, with op6 as imm)
2290 unsigned Lane = Imm->getZExtValue();
2291 unsigned IndexOp = FirstIndexOp;
2292 if (NumIndexVecs == 3)
2293 IndexOp += Lane;
2294
2295 SDValue TableTuple = createZTuple({Node->getOperand(1), Node->getOperand(2)});
2296 SDValue IndexTuple =
2297 createZTuple({Node->getOperand(IndexOp), Node->getOperand(IndexOp + 1)});
2298 SDValue Ops[] = {TableTuple, IndexTuple, Node->getOperand(ImmOp)};
2299
2300 SDLoc DL(Node);
2301 EVT VT = Node->getValueType(0);
2302 SDNode *Instruction =
2303 CurDAG->getMachineNode(AArch64::LUTI6_4Z2Z2ZI, DL, MVT::Untyped, Ops);
2304 SDValue SuperReg = SDValue(Instruction, 0);
2305
2306 for (unsigned I = 0; I < 4; ++I)
2307 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2308 AArch64::zsub0 + I, DL, VT, SuperReg));
2309
2310 CurDAG->RemoveDeadNode(Node);
2311}
2312
2313void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2314 unsigned NumOutVecs,
2315 unsigned Opc,
2316 unsigned NumInVecs) {
2317 assert((NumInVecs == 2 || NumInVecs == 3) &&
2318 "unexpected number of input vectors");
2319
2320 SDValue ZtValue;
2321 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2322 return;
2323
2324 SmallVector<SDValue, 4> Regs(Node->ops().slice(3, NumInVecs));
2325 SDValue ZTuple = NumInVecs == 3 ? createZTuple(Regs) : createZMulTuple(Regs);
2326 SDValue Ops[] = {ZtValue, ZTuple, Node->getOperand(0)};
2327
2328 SDLoc DL(Node);
2329 EVT VT = Node->getValueType(0);
2330
2331 SDNode *Instruction =
2332 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2333 SDValue SuperReg = SDValue(Instruction, 0);
2334
2335 for (unsigned I = 0; I < NumOutVecs; ++I)
2336 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2337 AArch64::zsub0 + I, DL, VT, SuperReg));
2338
2339 ReplaceUses(SDValue(Node, NumOutVecs), SDValue(Instruction, 1));
2340 CurDAG->RemoveDeadNode(Node);
2341}
2342
2343void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2344 unsigned Op) {
2345 SDLoc DL(N);
2346 EVT VT = N->getValueType(0);
2347
2348 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2349 SDValue Zd = createZMulTuple(Regs);
2350 SDValue Zn = N->getOperand(1 + NumVecs);
2351 SDValue Zm = N->getOperand(2 + NumVecs);
2352
2353 SDValue Ops[] = {Zd, Zn, Zm};
2354
2355 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2356 SDValue SuperReg = SDValue(Intrinsic, 0);
2357 for (unsigned i = 0; i < NumVecs; ++i)
2358 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2359 AArch64::zsub0 + i, DL, VT, SuperReg));
2360
2361 CurDAG->RemoveDeadNode(N);
2362}
2363
2364bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2365 switch (BaseReg) {
2366 default:
2367 return false;
2368 case AArch64::ZA:
2369 case AArch64::ZAB0:
2370 if (TileNum == 0)
2371 break;
2372 return false;
2373 case AArch64::ZAH0:
2374 if (TileNum <= 1)
2375 break;
2376 return false;
2377 case AArch64::ZAS0:
2378 if (TileNum <= 3)
2379 break;
2380 return false;
2381 case AArch64::ZAD0:
2382 if (TileNum <= 7)
2383 break;
2384 return false;
2385 }
2386
2387 BaseReg += TileNum;
2388 return true;
2389}
2390
2391template <unsigned MaxIdx, unsigned Scale>
2392void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2393 unsigned BaseReg, unsigned Op) {
2394 unsigned TileNum = 0;
2395 if (BaseReg != AArch64::ZA)
2396 TileNum = N->getConstantOperandVal(2);
2397
2398 if (!SelectSMETile(BaseReg, TileNum))
2399 return;
2400
2401 SDValue SliceBase, Base, Offset;
2402 if (BaseReg == AArch64::ZA)
2403 SliceBase = N->getOperand(2);
2404 else
2405 SliceBase = N->getOperand(3);
2406
2407 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2408 return;
2409
2410 SDLoc DL(N);
2411 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2412 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2413 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2414
2415 EVT VT = N->getValueType(0);
2416 for (unsigned I = 0; I < NumVecs; ++I)
2417 ReplaceUses(SDValue(N, I),
2418 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2419 SDValue(Mov, 0)));
2420 // Copy chain
2421 unsigned ChainIdx = NumVecs;
2422 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2423 CurDAG->RemoveDeadNode(N);
2424}
2425
2426void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2427 unsigned Op, unsigned MaxIdx,
2428 unsigned Scale, unsigned BaseReg) {
2429 // Slice can be in different positions
2430 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2431 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2432 SDValue SliceBase = N->getOperand(2);
2433 if (BaseReg != AArch64::ZA)
2434 SliceBase = N->getOperand(3);
2435
2437 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2438 return;
2439 // The correct Za tile number is computed in Machine Instruction
2440 // See EmitZAInstr
2441 // DAG cannot select Za tile as an output register with ZReg
2442 SDLoc DL(N);
2444 if (BaseReg != AArch64::ZA )
2445 Ops.push_back(N->getOperand(2));
2446 Ops.push_back(Base);
2447 Ops.push_back(Offset);
2448 Ops.push_back(N->getOperand(0)); //Chain
2449 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2450
2451 EVT VT = N->getValueType(0);
2452 for (unsigned I = 0; I < NumVecs; ++I)
2453 ReplaceUses(SDValue(N, I),
2454 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2455 SDValue(Mov, 0)));
2456
2457 // Copy chain
2458 unsigned ChainIdx = NumVecs;
2459 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2460 CurDAG->RemoveDeadNode(N);
2461}
2462
2463void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2464 unsigned NumOutVecs,
2465 bool IsTupleInput,
2466 unsigned Opc) {
2467 SDLoc DL(N);
2468 EVT VT = N->getValueType(0);
2469 unsigned NumInVecs = N->getNumOperands() - 1;
2470
2472 if (IsTupleInput) {
2473 assert((NumInVecs == 2 || NumInVecs == 4) &&
2474 "Don't know how to handle multi-register input!");
2475 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2476 Ops.push_back(createZMulTuple(Regs));
2477 } else {
2478 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2479 for (unsigned I = 0; I < NumInVecs; I++)
2480 Ops.push_back(N->getOperand(1 + I));
2481 }
2482
2483 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2484 SDValue SuperReg = SDValue(Res, 0);
2485
2486 for (unsigned I = 0; I < NumOutVecs; I++)
2487 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2488 AArch64::zsub0 + I, DL, VT, SuperReg));
2489 CurDAG->RemoveDeadNode(N);
2490}
2491
2492void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2493 unsigned Opc) {
2494 SDLoc dl(N);
2495 EVT VT = N->getOperand(2)->getValueType(0);
2496
2497 // Form a REG_SEQUENCE to force register allocation.
2498 bool Is128Bit = VT.getSizeInBits() == 128;
2499 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2500 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2501
2502 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2503 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2504
2505 // Transfer memoperands.
2506 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2507 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2508
2509 ReplaceNode(N, St);
2510}
2511
2512void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2513 unsigned Scale, unsigned Opc_rr,
2514 unsigned Opc_ri) {
2515 SDLoc dl(N);
2516
2517 // Form a REG_SEQUENCE to force register allocation.
2518 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2519 SDValue RegSeq = createZTuple(Regs);
2520
2521 // Optimize addressing mode.
2522 unsigned Opc;
2524 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2525 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2526 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2527
2528 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2529 Base, // address
2530 Offset, // offset
2531 N->getOperand(0)}; // chain
2532 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2533
2534 ReplaceNode(N, St);
2535}
2536
2537bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2538 SDValue &OffImm) {
2539 SDLoc dl(N);
2540 const DataLayout &DL = CurDAG->getDataLayout();
2541 const TargetLowering *TLI = getTargetLowering();
2542
2543 // Try to match it for the frame address
2544 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2545 int FI = FINode->getIndex();
2546 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2547 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2548 return true;
2549 }
2550
2551 return false;
2552}
2553
2554void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2555 unsigned Opc) {
2556 SDLoc dl(N);
2557 EVT VT = N->getOperand(2)->getValueType(0);
2558 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2559 MVT::Other}; // Type for the Chain
2560
2561 // Form a REG_SEQUENCE to force register allocation.
2562 bool Is128Bit = VT.getSizeInBits() == 128;
2563 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2564 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2565
2566 SDValue Ops[] = {RegSeq,
2567 N->getOperand(NumVecs + 1), // base register
2568 N->getOperand(NumVecs + 2), // Incremental
2569 N->getOperand(0)}; // Chain
2570 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2571
2572 // Transfer memoperands.
2573 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2574 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2575
2576 ReplaceNode(N, St);
2577}
2578
2579namespace {
2580/// WidenVector - Given a value in the V64 register class, produce the
2581/// equivalent value in the V128 register class.
2582class WidenVector {
2583 SelectionDAG &DAG;
2584
2585public:
2586 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2587
2588 SDValue operator()(SDValue V64Reg) {
2589 EVT VT = V64Reg.getValueType();
2590 unsigned NarrowSize = VT.getVectorNumElements();
2591 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2592 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2593 SDLoc DL(V64Reg);
2594
2595 SDValue Undef =
2596 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2597 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2598 }
2599};
2600} // namespace
2601
2602/// NarrowVector - Given a value in the V128 register class, produce the
2603/// equivalent value in the V64 register class.
2605 EVT VT = V128Reg.getValueType();
2606 unsigned WideSize = VT.getVectorNumElements();
2607 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2608 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2609
2610 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2611 V128Reg);
2612}
2613
2614void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2615 unsigned Opc) {
2616 SDLoc dl(N);
2617 EVT VT = N->getValueType(0);
2618 bool Narrow = VT.getSizeInBits() == 64;
2619
2620 // Form a REG_SEQUENCE to force register allocation.
2621 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2622
2623 if (Narrow)
2624 transform(Regs, Regs.begin(),
2625 WidenVector(*CurDAG));
2626
2627 SDValue RegSeq = createQTuple(Regs);
2628
2629 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2630
2631 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2632
2633 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2634 N->getOperand(NumVecs + 3), N->getOperand(0)};
2635 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2636 SDValue SuperReg = SDValue(Ld, 0);
2637
2638 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2639 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2640 AArch64::qsub2, AArch64::qsub3 };
2641 for (unsigned i = 0; i < NumVecs; ++i) {
2642 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2643 if (Narrow)
2644 NV = NarrowVector(NV, *CurDAG);
2645 ReplaceUses(SDValue(N, i), NV);
2646 }
2647
2648 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2649 CurDAG->RemoveDeadNode(N);
2650}
2651
2652void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2653 unsigned Opc) {
2654 SDLoc dl(N);
2655 EVT VT = N->getValueType(0);
2656 bool Narrow = VT.getSizeInBits() == 64;
2657
2658 // Form a REG_SEQUENCE to force register allocation.
2659 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2660
2661 if (Narrow)
2662 transform(Regs, Regs.begin(),
2663 WidenVector(*CurDAG));
2664
2665 SDValue RegSeq = createQTuple(Regs);
2666
2667 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2668 RegSeq->getValueType(0), MVT::Other};
2669
2670 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2671
2672 SDValue Ops[] = {RegSeq,
2673 CurDAG->getTargetConstant(LaneNo, dl,
2674 MVT::i64), // Lane Number
2675 N->getOperand(NumVecs + 2), // Base register
2676 N->getOperand(NumVecs + 3), // Incremental
2677 N->getOperand(0)};
2678 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2679
2680 // Update uses of the write back register
2681 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2682
2683 // Update uses of the vector list
2684 SDValue SuperReg = SDValue(Ld, 1);
2685 if (NumVecs == 1) {
2686 ReplaceUses(SDValue(N, 0),
2687 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2688 } else {
2689 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2690 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2691 AArch64::qsub2, AArch64::qsub3 };
2692 for (unsigned i = 0; i < NumVecs; ++i) {
2693 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2694 SuperReg);
2695 if (Narrow)
2696 NV = NarrowVector(NV, *CurDAG);
2697 ReplaceUses(SDValue(N, i), NV);
2698 }
2699 }
2700
2701 // Update the Chain
2702 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2703 CurDAG->RemoveDeadNode(N);
2704}
2705
2706void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2707 unsigned Opc) {
2708 SDLoc dl(N);
2709 EVT VT = N->getOperand(2)->getValueType(0);
2710 bool Narrow = VT.getSizeInBits() == 64;
2711
2712 // Form a REG_SEQUENCE to force register allocation.
2713 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2714
2715 if (Narrow)
2716 transform(Regs, Regs.begin(),
2717 WidenVector(*CurDAG));
2718
2719 SDValue RegSeq = createQTuple(Regs);
2720
2721 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2722
2723 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2724 N->getOperand(NumVecs + 3), N->getOperand(0)};
2725 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2726
2727 // Transfer memoperands.
2728 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2729 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2730
2731 ReplaceNode(N, St);
2732}
2733
2734void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2735 unsigned Opc) {
2736 SDLoc dl(N);
2737 EVT VT = N->getOperand(2)->getValueType(0);
2738 bool Narrow = VT.getSizeInBits() == 64;
2739
2740 // Form a REG_SEQUENCE to force register allocation.
2741 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2742
2743 if (Narrow)
2744 transform(Regs, Regs.begin(),
2745 WidenVector(*CurDAG));
2746
2747 SDValue RegSeq = createQTuple(Regs);
2748
2749 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2750 MVT::Other};
2751
2752 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2753
2754 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2755 N->getOperand(NumVecs + 2), // Base Register
2756 N->getOperand(NumVecs + 3), // Incremental
2757 N->getOperand(0)};
2758 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2759
2760 // Transfer memoperands.
2761 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2762 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2763
2764 ReplaceNode(N, St);
2765}
2766
2768 unsigned &Opc, SDValue &Opd0,
2769 unsigned &LSB, unsigned &MSB,
2770 unsigned NumberOfIgnoredLowBits,
2771 bool BiggerPattern) {
2772 assert(N->getOpcode() == ISD::AND &&
2773 "N must be a AND operation to call this function");
2774
2775 EVT VT = N->getValueType(0);
2776
2777 // Here we can test the type of VT and return false when the type does not
2778 // match, but since it is done prior to that call in the current context
2779 // we turned that into an assert to avoid redundant code.
2780 assert((VT == MVT::i32 || VT == MVT::i64) &&
2781 "Type checking must have been done before calling this function");
2782
2783 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2784 // changed the AND node to a 32-bit mask operation. We'll have to
2785 // undo that as part of the transform here if we want to catch all
2786 // the opportunities.
2787 // Currently the NumberOfIgnoredLowBits argument helps to recover
2788 // from these situations when matching bigger pattern (bitfield insert).
2789
2790 // For unsigned extracts, check for a shift right and mask
2791 uint64_t AndImm = 0;
2792 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2793 return false;
2794
2795 const SDNode *Op0 = N->getOperand(0).getNode();
2796
2797 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2798 // simplified. Try to undo that
2799 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2800
2801 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2802 if (AndImm & (AndImm + 1))
2803 return false;
2804
2805 bool ClampMSB = false;
2806 uint64_t SrlImm = 0;
2807 // Handle the SRL + ANY_EXTEND case.
2808 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2809 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2810 // Extend the incoming operand of the SRL to 64-bit.
2811 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2812 // Make sure to clamp the MSB so that we preserve the semantics of the
2813 // original operations.
2814 ClampMSB = true;
2815 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2817 SrlImm)) {
2818 // If the shift result was truncated, we can still combine them.
2819 Opd0 = Op0->getOperand(0).getOperand(0);
2820
2821 // Use the type of SRL node.
2822 VT = Opd0->getValueType(0);
2823 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2824 Opd0 = Op0->getOperand(0);
2825 ClampMSB = (VT == MVT::i32);
2826 } else if (BiggerPattern) {
2827 // Let's pretend a 0 shift right has been performed.
2828 // The resulting code will be at least as good as the original one
2829 // plus it may expose more opportunities for bitfield insert pattern.
2830 // FIXME: Currently we limit this to the bigger pattern, because
2831 // some optimizations expect AND and not UBFM.
2832 Opd0 = N->getOperand(0);
2833 } else
2834 return false;
2835
2836 // Bail out on large immediates. This happens when no proper
2837 // combining/constant folding was performed.
2838 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2839 LLVM_DEBUG(
2840 (dbgs() << N
2841 << ": Found large shift immediate, this should not happen\n"));
2842 return false;
2843 }
2844
2845 LSB = SrlImm;
2846 MSB = SrlImm +
2847 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2848 : llvm::countr_one<uint64_t>(AndImm)) -
2849 1;
2850 if (ClampMSB)
2851 // Since we're moving the extend before the right shift operation, we need
2852 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2853 // the zeros which would get shifted in with the original right shift
2854 // operation.
2855 MSB = MSB > 31 ? 31 : MSB;
2856
2857 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2858 return true;
2859}
2860
2862 SDValue &Opd0, unsigned &Immr,
2863 unsigned &Imms) {
2864 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2865
2866 EVT VT = N->getValueType(0);
2867 unsigned BitWidth = VT.getSizeInBits();
2868 assert((VT == MVT::i32 || VT == MVT::i64) &&
2869 "Type checking must have been done before calling this function");
2870
2871 SDValue Op = N->getOperand(0);
2872 if (Op->getOpcode() == ISD::TRUNCATE) {
2873 Op = Op->getOperand(0);
2874 VT = Op->getValueType(0);
2875 BitWidth = VT.getSizeInBits();
2876 }
2877
2878 uint64_t ShiftImm;
2879 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2880 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2881 return false;
2882
2883 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2884 if (ShiftImm + Width > BitWidth)
2885 return false;
2886
2887 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2888 Opd0 = Op.getOperand(0);
2889 Immr = ShiftImm;
2890 Imms = ShiftImm + Width - 1;
2891 return true;
2892}
2893
2895 SDValue &Opd0, unsigned &LSB,
2896 unsigned &MSB) {
2897 // We are looking for the following pattern which basically extracts several
2898 // continuous bits from the source value and places it from the LSB of the
2899 // destination value, all other bits of the destination value or set to zero:
2900 //
2901 // Value2 = AND Value, MaskImm
2902 // SRL Value2, ShiftImm
2903 //
2904 // with MaskImm >> ShiftImm to search for the bit width.
2905 //
2906 // This gets selected into a single UBFM:
2907 //
2908 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2909 //
2910
2911 if (N->getOpcode() != ISD::SRL)
2912 return false;
2913
2914 uint64_t AndMask = 0;
2915 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2916 return false;
2917
2918 Opd0 = N->getOperand(0).getOperand(0);
2919
2920 uint64_t SrlImm = 0;
2921 if (!isIntImmediate(N->getOperand(1), SrlImm))
2922 return false;
2923
2924 // Check whether we really have several bits extract here.
2925 if (!isMask_64(AndMask >> SrlImm))
2926 return false;
2927
2928 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2929 LSB = SrlImm;
2930 MSB = llvm::Log2_64(AndMask);
2931 return true;
2932}
2933
2934static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2935 unsigned &Immr, unsigned &Imms,
2936 bool BiggerPattern) {
2937 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2938 "N must be a SHR/SRA operation to call this function");
2939
2940 EVT VT = N->getValueType(0);
2941
2942 // Here we can test the type of VT and return false when the type does not
2943 // match, but since it is done prior to that call in the current context
2944 // we turned that into an assert to avoid redundant code.
2945 assert((VT == MVT::i32 || VT == MVT::i64) &&
2946 "Type checking must have been done before calling this function");
2947
2948 // Check for AND + SRL doing several bits extract.
2949 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2950 return true;
2951
2952 // We're looking for a shift of a shift.
2953 uint64_t ShlImm = 0;
2954 uint64_t TruncBits = 0;
2955 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2956 Opd0 = N->getOperand(0).getOperand(0);
2957 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2958 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2959 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2960 // be considered as setting high 32 bits as zero. Our strategy here is to
2961 // always generate 64bit UBFM. This consistency will help the CSE pass
2962 // later find more redundancy.
2963 Opd0 = N->getOperand(0).getOperand(0);
2964 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2965 VT = Opd0.getValueType();
2966 assert(VT == MVT::i64 && "the promoted type should be i64");
2967 } else if (BiggerPattern) {
2968 // Let's pretend a 0 shift left has been performed.
2969 // FIXME: Currently we limit this to the bigger pattern case,
2970 // because some optimizations expect AND and not UBFM
2971 Opd0 = N->getOperand(0);
2972 } else
2973 return false;
2974
2975 // Missing combines/constant folding may have left us with strange
2976 // constants.
2977 if (ShlImm >= VT.getSizeInBits()) {
2978 LLVM_DEBUG(
2979 (dbgs() << N
2980 << ": Found large shift immediate, this should not happen\n"));
2981 return false;
2982 }
2983
2984 uint64_t SrlImm = 0;
2985 if (!isIntImmediate(N->getOperand(1), SrlImm))
2986 return false;
2987
2988 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2989 "bad amount in shift node!");
2990 int immr = SrlImm - ShlImm;
2991 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2992 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2993 // SRA requires a signed extraction
2994 if (VT == MVT::i32)
2995 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2996 else
2997 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2998 return true;
2999}
3000
3001bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
3002 assert(N->getOpcode() == ISD::SIGN_EXTEND);
3003
3004 EVT VT = N->getValueType(0);
3005 EVT NarrowVT = N->getOperand(0)->getValueType(0);
3006 if (VT != MVT::i64 || NarrowVT != MVT::i32)
3007 return false;
3008
3009 uint64_t ShiftImm;
3010 SDValue Op = N->getOperand(0);
3011 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
3012 return false;
3013
3014 SDLoc dl(N);
3015 // Extend the incoming operand of the shift to 64-bits.
3016 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
3017 unsigned Immr = ShiftImm;
3018 unsigned Imms = NarrowVT.getSizeInBits() - 1;
3019 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
3020 CurDAG->getTargetConstant(Imms, dl, VT)};
3021 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
3022 return true;
3023}
3024
3025static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
3026 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
3027 unsigned NumberOfIgnoredLowBits = 0,
3028 bool BiggerPattern = false) {
3029 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
3030 return false;
3031
3032 switch (N->getOpcode()) {
3033 default:
3034 if (!N->isMachineOpcode())
3035 return false;
3036 break;
3037 case ISD::AND:
3038 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
3039 NumberOfIgnoredLowBits, BiggerPattern);
3040 case ISD::SRL:
3041 case ISD::SRA:
3042 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
3043
3045 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
3046 }
3047
3048 unsigned NOpc = N->getMachineOpcode();
3049 switch (NOpc) {
3050 default:
3051 return false;
3052 case AArch64::SBFMWri:
3053 case AArch64::UBFMWri:
3054 case AArch64::SBFMXri:
3055 case AArch64::UBFMXri:
3056 Opc = NOpc;
3057 Opd0 = N->getOperand(0);
3058 Immr = N->getConstantOperandVal(1);
3059 Imms = N->getConstantOperandVal(2);
3060 return true;
3061 }
3062 // Unreachable
3063 return false;
3064}
3065
3066bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
3067 unsigned Opc, Immr, Imms;
3068 SDValue Opd0;
3069 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
3070 return false;
3071
3072 EVT VT = N->getValueType(0);
3073 SDLoc dl(N);
3074
3075 // If the bit extract operation is 64bit but the original type is 32bit, we
3076 // need to add one EXTRACT_SUBREG.
3077 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
3078 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
3079 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
3080
3081 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
3082 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
3083 MVT::i32, SDValue(BFM, 0));
3084 ReplaceNode(N, Inner.getNode());
3085 return true;
3086 }
3087
3088 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
3089 CurDAG->getTargetConstant(Imms, dl, VT)};
3090 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3091 return true;
3092}
3093
3094/// Does DstMask form a complementary pair with the mask provided by
3095/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
3096/// this asks whether DstMask zeroes precisely those bits that will be set by
3097/// the other half.
3098static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
3099 unsigned NumberOfIgnoredHighBits, EVT VT) {
3100 assert((VT == MVT::i32 || VT == MVT::i64) &&
3101 "i32 or i64 mask type expected!");
3102 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
3103
3104 // Enable implicitTrunc as we're intentionally ignoring high bits.
3105 APInt SignificantDstMask =
3106 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
3107 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
3108
3109 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
3110 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
3111}
3112
3113// Look for bits that will be useful for later uses.
3114// A bit is consider useless as soon as it is dropped and never used
3115// before it as been dropped.
3116// E.g., looking for useful bit of x
3117// 1. y = x & 0x7
3118// 2. z = y >> 2
3119// After #1, x useful bits are 0x7, then the useful bits of x, live through
3120// y.
3121// After #2, the useful bits of x are 0x4.
3122// However, if x is used on an unpredictable instruction, then all its bits
3123// are useful.
3124// E.g.
3125// 1. y = x & 0x7
3126// 2. z = y >> 2
3127// 3. str x, [@x]
3128static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
3129
3131 unsigned Depth) {
3132 uint64_t Imm =
3133 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
3134 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
3135 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
3136 getUsefulBits(Op, UsefulBits, Depth + 1);
3137}
3138
3140 uint64_t Imm, uint64_t MSB,
3141 unsigned Depth) {
3142 // inherit the bitwidth value
3143 APInt OpUsefulBits(UsefulBits);
3144 OpUsefulBits = 1;
3145
3146 if (MSB >= Imm) {
3147 OpUsefulBits <<= MSB - Imm + 1;
3148 --OpUsefulBits;
3149 // The interesting part will be in the lower part of the result
3150 getUsefulBits(Op, OpUsefulBits, Depth + 1);
3151 // The interesting part was starting at Imm in the argument
3152 OpUsefulBits <<= Imm;
3153 } else {
3154 OpUsefulBits <<= MSB + 1;
3155 --OpUsefulBits;
3156 // The interesting part will be shifted in the result
3157 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
3158 getUsefulBits(Op, OpUsefulBits, Depth + 1);
3159 // The interesting part was at zero in the argument
3160 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
3161 }
3162
3163 UsefulBits &= OpUsefulBits;
3164}
3165
3166static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
3167 unsigned Depth) {
3168 uint64_t Imm =
3169 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
3170 uint64_t MSB =
3171 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3172
3173 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
3174}
3175
3177 unsigned Depth) {
3178 uint64_t ShiftTypeAndValue =
3179 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3180 APInt Mask(UsefulBits);
3181 Mask.clearAllBits();
3182 Mask.flipAllBits();
3183
3184 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
3185 // Shift Left
3186 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3187 Mask <<= ShiftAmt;
3188 getUsefulBits(Op, Mask, Depth + 1);
3189 Mask.lshrInPlace(ShiftAmt);
3190 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
3191 // Shift Right
3192 // We do not handle AArch64_AM::ASR, because the sign will change the
3193 // number of useful bits
3194 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3195 Mask.lshrInPlace(ShiftAmt);
3196 getUsefulBits(Op, Mask, Depth + 1);
3197 Mask <<= ShiftAmt;
3198 } else
3199 return;
3200
3201 UsefulBits &= Mask;
3202}
3203
3204static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
3205 unsigned Depth) {
3206 uint64_t Imm =
3207 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3208 uint64_t MSB =
3209 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
3210
3211 APInt OpUsefulBits(UsefulBits);
3212 OpUsefulBits = 1;
3213
3214 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3215 ResultUsefulBits.flipAllBits();
3216 APInt Mask(UsefulBits.getBitWidth(), 0);
3217
3218 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
3219
3220 if (MSB >= Imm) {
3221 // The instruction is a BFXIL.
3222 uint64_t Width = MSB - Imm + 1;
3223 uint64_t LSB = Imm;
3224
3225 OpUsefulBits <<= Width;
3226 --OpUsefulBits;
3227
3228 if (Op.getOperand(1) == Orig) {
3229 // Copy the low bits from the result to bits starting from LSB.
3230 Mask = ResultUsefulBits & OpUsefulBits;
3231 Mask <<= LSB;
3232 }
3233
3234 if (Op.getOperand(0) == Orig)
3235 // Bits starting from LSB in the input contribute to the result.
3236 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3237 } else {
3238 // The instruction is a BFI.
3239 uint64_t Width = MSB + 1;
3240 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3241
3242 OpUsefulBits <<= Width;
3243 --OpUsefulBits;
3244 OpUsefulBits <<= LSB;
3245
3246 if (Op.getOperand(1) == Orig) {
3247 // Copy the bits from the result to the zero bits.
3248 Mask = ResultUsefulBits & OpUsefulBits;
3249 Mask.lshrInPlace(LSB);
3250 }
3251
3252 if (Op.getOperand(0) == Orig)
3253 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3254 }
3255
3256 UsefulBits &= Mask;
3257}
3258
3259static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3260 SDValue Orig, unsigned Depth) {
3261
3262 // Users of this node should have already been instruction selected
3263 // FIXME: Can we turn that into an assert?
3264 if (!UserNode->isMachineOpcode())
3265 return;
3266
3267 switch (UserNode->getMachineOpcode()) {
3268 default:
3269 return;
3270 case AArch64::ANDSWri:
3271 case AArch64::ANDSXri:
3272 case AArch64::ANDWri:
3273 case AArch64::ANDXri:
3274 // We increment Depth only when we call the getUsefulBits
3275 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3276 Depth);
3277 case AArch64::UBFMWri:
3278 case AArch64::UBFMXri:
3279 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3280
3281 case AArch64::ORRWrs:
3282 case AArch64::ORRXrs:
3283 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3284 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3285 Depth);
3286 return;
3287 case AArch64::BFMWri:
3288 case AArch64::BFMXri:
3289 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3290
3291 case AArch64::STRBBui:
3292 case AArch64::STURBBi:
3293 if (UserNode->getOperand(0) != Orig)
3294 return;
3295 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3296 return;
3297
3298 case AArch64::STRHHui:
3299 case AArch64::STURHHi:
3300 if (UserNode->getOperand(0) != Orig)
3301 return;
3302 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3303 return;
3304 }
3305}
3306
3307static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3309 return;
3310 // Initialize UsefulBits
3311 if (!Depth) {
3312 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3313 // At the beginning, assume every produced bits is useful
3314 UsefulBits = APInt(Bitwidth, 0);
3315 UsefulBits.flipAllBits();
3316 }
3317 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3318
3319 for (SDNode *Node : Op.getNode()->users()) {
3320 // A use cannot produce useful bits
3321 APInt UsefulBitsForUse = APInt(UsefulBits);
3322 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3323 UsersUsefulBits |= UsefulBitsForUse;
3324 }
3325 // UsefulBits contains the produced bits that are meaningful for the
3326 // current definition, thus a user cannot make a bit meaningful at
3327 // this point
3328 UsefulBits &= UsersUsefulBits;
3329}
3330
3331/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3332/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3333/// 0, return Op unchanged.
3334static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3335 if (ShlAmount == 0)
3336 return Op;
3337
3338 EVT VT = Op.getValueType();
3339 SDLoc dl(Op);
3340 unsigned BitWidth = VT.getSizeInBits();
3341 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3342
3343 SDNode *ShiftNode;
3344 if (ShlAmount > 0) {
3345 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3346 ShiftNode = CurDAG->getMachineNode(
3347 UBFMOpc, dl, VT, Op,
3348 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3349 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3350 } else {
3351 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3352 assert(ShlAmount < 0 && "expected right shift");
3353 int ShrAmount = -ShlAmount;
3354 ShiftNode = CurDAG->getMachineNode(
3355 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3356 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3357 }
3358
3359 return SDValue(ShiftNode, 0);
3360}
3361
3362// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3363static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3364 bool BiggerPattern,
3365 const uint64_t NonZeroBits,
3366 SDValue &Src, int &DstLSB,
3367 int &Width);
3368
3369// For bit-field-positioning pattern "shl VAL, N)".
3370static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3371 bool BiggerPattern,
3372 const uint64_t NonZeroBits,
3373 SDValue &Src, int &DstLSB,
3374 int &Width);
3375
3376/// Does this tree qualify as an attempt to move a bitfield into position,
3377/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3379 bool BiggerPattern, SDValue &Src,
3380 int &DstLSB, int &Width) {
3381 EVT VT = Op.getValueType();
3382 unsigned BitWidth = VT.getSizeInBits();
3383 (void)BitWidth;
3384 assert(BitWidth == 32 || BitWidth == 64);
3385
3386 KnownBits Known = CurDAG->computeKnownBits(Op);
3387
3388 // Non-zero in the sense that they're not provably zero, which is the key
3389 // point if we want to use this value
3390 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3391 if (!isShiftedMask_64(NonZeroBits))
3392 return false;
3393
3394 switch (Op.getOpcode()) {
3395 default:
3396 break;
3397 case ISD::AND:
3398 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3399 NonZeroBits, Src, DstLSB, Width);
3400 case ISD::SHL:
3401 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3402 NonZeroBits, Src, DstLSB, Width);
3403 }
3404
3405 return false;
3406}
3407
3409 bool BiggerPattern,
3410 const uint64_t NonZeroBits,
3411 SDValue &Src, int &DstLSB,
3412 int &Width) {
3413 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3414
3415 EVT VT = Op.getValueType();
3416 assert((VT == MVT::i32 || VT == MVT::i64) &&
3417 "Caller guarantees VT is one of i32 or i64");
3418 (void)VT;
3419
3420 uint64_t AndImm;
3421 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3422 return false;
3423
3424 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3425 // 1) (AndImm & (1 << POS) == 0)
3426 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3427 //
3428 // 1) and 2) don't agree so something must be wrong (e.g., in
3429 // 'SelectionDAG::computeKnownBits')
3430 assert((~AndImm & NonZeroBits) == 0 &&
3431 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3432
3433 SDValue AndOp0 = Op.getOperand(0);
3434
3435 uint64_t ShlImm;
3436 SDValue ShlOp0;
3437 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3438 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3439 ShlOp0 = AndOp0.getOperand(0);
3440 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3442 ShlImm)) {
3443 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3444
3445 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3446 SDValue ShlVal = AndOp0.getOperand(0);
3447
3448 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3449 // expect VT to be MVT::i32.
3450 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3451
3452 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3453 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3454 } else
3455 return false;
3456
3457 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3458 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3459 // AndOp0+AND.
3460 if (!BiggerPattern && !AndOp0.hasOneUse())
3461 return false;
3462
3463 DstLSB = llvm::countr_zero(NonZeroBits);
3464 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3465
3466 // Bail out on large Width. This happens when no proper combining / constant
3467 // folding was performed.
3468 if (Width >= (int)VT.getSizeInBits()) {
3469 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3470 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3471 // "val".
3472 // If VT is i32, what Width >= 32 means:
3473 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3474 // demands at least 'Width' bits (after dag-combiner). This together with
3475 // `any_extend` Op (undefined higher bits) indicates missed combination
3476 // when lowering the 'and' IR instruction to an machine IR instruction.
3477 LLVM_DEBUG(
3478 dbgs()
3479 << "Found large Width in bit-field-positioning -- this indicates no "
3480 "proper combining / constant folding was performed\n");
3481 return false;
3482 }
3483
3484 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3485 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3486 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3487 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3488 // which case it is not profitable to insert an extra shift.
3489 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3490 return false;
3491
3492 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3493 return true;
3494}
3495
3496// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3497// UBFIZ.
3499 SDValue &Src, int &DstLSB,
3500 int &Width) {
3501 // Caller should have verified that N is a left shift with constant shift
3502 // amount; asserts that.
3503 assert(Op.getOpcode() == ISD::SHL &&
3504 "Op.getNode() should be a SHL node to call this function");
3505 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3506 "Op.getNode() should shift ShlImm to call this function");
3507
3508 uint64_t AndImm = 0;
3509 SDValue Op0 = Op.getOperand(0);
3510 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3511 return false;
3512
3513 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3514 if (isMask_64(ShiftedAndImm)) {
3515 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3516 // should end with Mask, and could be prefixed with random bits if those
3517 // bits are shifted out.
3518 //
3519 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3520 // the AND result corresponding to those bits are shifted out, so it's fine
3521 // to not extract them.
3522 Width = llvm::countr_one(ShiftedAndImm);
3523 DstLSB = ShlImm;
3524 Src = Op0.getOperand(0);
3525 return true;
3526 }
3527 return false;
3528}
3529
3531 bool BiggerPattern,
3532 const uint64_t NonZeroBits,
3533 SDValue &Src, int &DstLSB,
3534 int &Width) {
3535 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3536
3537 EVT VT = Op.getValueType();
3538 assert((VT == MVT::i32 || VT == MVT::i64) &&
3539 "Caller guarantees that type is i32 or i64");
3540 (void)VT;
3541
3542 uint64_t ShlImm;
3543 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3544 return false;
3545
3546 if (!BiggerPattern && !Op.hasOneUse())
3547 return false;
3548
3549 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3550 return true;
3551
3552 DstLSB = llvm::countr_zero(NonZeroBits);
3553 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3554
3555 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3556 return false;
3557
3558 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3559 return true;
3560}
3561
3562static bool isShiftedMask(uint64_t Mask, EVT VT) {
3563 assert(VT == MVT::i32 || VT == MVT::i64);
3564 if (VT == MVT::i32)
3565 return isShiftedMask_32(Mask);
3566 return isShiftedMask_64(Mask);
3567}
3568
3569// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3570// inserted only sets known zero bits.
3572 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3573
3574 EVT VT = N->getValueType(0);
3575 if (VT != MVT::i32 && VT != MVT::i64)
3576 return false;
3577
3578 unsigned BitWidth = VT.getSizeInBits();
3579
3580 uint64_t OrImm;
3581 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3582 return false;
3583
3584 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3585 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3586 // performance neutral.
3588 return false;
3589
3590 uint64_t MaskImm;
3591 SDValue And = N->getOperand(0);
3592 // Must be a single use AND with an immediate operand.
3593 if (!And.hasOneUse() ||
3594 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3595 return false;
3596
3597 // Compute the Known Zero for the AND as this allows us to catch more general
3598 // cases than just looking for AND with imm.
3599 KnownBits Known = CurDAG->computeKnownBits(And);
3600
3601 // Non-zero in the sense that they're not provably zero, which is the key
3602 // point if we want to use this value.
3603 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3604
3605 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3606 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3607 return false;
3608
3609 // The bits being inserted must only set those bits that are known to be zero.
3610 if ((OrImm & NotKnownZero) != 0) {
3611 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3612 // currently handle this case.
3613 return false;
3614 }
3615
3616 // BFI/BFXIL dst, src, #lsb, #width.
3617 int LSB = llvm::countr_one(NotKnownZero);
3618 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3619
3620 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3621 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3622 unsigned ImmS = Width - 1;
3623
3624 // If we're creating a BFI instruction avoid cases where we need more
3625 // instructions to materialize the BFI constant as compared to the original
3626 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3627 // should be no worse in this case.
3628 bool IsBFI = LSB != 0;
3629 uint64_t BFIImm = OrImm >> LSB;
3630 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3631 // We have a BFI instruction and we know the constant can't be materialized
3632 // with a ORR-immediate with the zero register.
3633 unsigned OrChunks = 0, BFIChunks = 0;
3634 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3635 if (((OrImm >> Shift) & 0xFFFF) != 0)
3636 ++OrChunks;
3637 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3638 ++BFIChunks;
3639 }
3640 if (BFIChunks > OrChunks)
3641 return false;
3642 }
3643
3644 // Materialize the constant to be inserted.
3645 SDLoc DL(N);
3646 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3647 SDNode *MOVI = CurDAG->getMachineNode(
3648 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3649
3650 // Create the BFI/BFXIL instruction.
3651 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3652 CurDAG->getTargetConstant(ImmR, DL, VT),
3653 CurDAG->getTargetConstant(ImmS, DL, VT)};
3654 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3655 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3656 return true;
3657}
3658
3660 SDValue &ShiftedOperand,
3661 uint64_t &EncodedShiftImm) {
3662 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3663 if (!Dst.hasOneUse())
3664 return false;
3665
3666 EVT VT = Dst.getValueType();
3667 assert((VT == MVT::i32 || VT == MVT::i64) &&
3668 "Caller should guarantee that VT is one of i32 or i64");
3669 const unsigned SizeInBits = VT.getSizeInBits();
3670
3671 SDLoc DL(Dst.getNode());
3672 uint64_t AndImm, ShlImm;
3673 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3674 isShiftedMask_64(AndImm)) {
3675 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3676 SDValue DstOp0 = Dst.getOperand(0);
3677 if (!DstOp0.hasOneUse())
3678 return false;
3679
3680 // An example to illustrate the transformation
3681 // From:
3682 // lsr x8, x1, #1
3683 // and x8, x8, #0x3f80
3684 // bfxil x8, x1, #0, #7
3685 // To:
3686 // and x8, x23, #0x7f
3687 // ubfx x9, x23, #8, #7
3688 // orr x23, x8, x9, lsl #7
3689 //
3690 // The number of instructions remains the same, but ORR is faster than BFXIL
3691 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3692 // the dependency chain is improved after the transformation.
3693 uint64_t SrlImm;
3694 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3695 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3696 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3697 unsigned MaskWidth =
3698 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3699 unsigned UBFMOpc =
3700 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3701 SDNode *UBFMNode = CurDAG->getMachineNode(
3702 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3703 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3704 VT),
3705 CurDAG->getTargetConstant(
3706 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3707 ShiftedOperand = SDValue(UBFMNode, 0);
3708 EncodedShiftImm = AArch64_AM::getShifterImm(
3709 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3710 return true;
3711 }
3712 }
3713 return false;
3714 }
3715
3716 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3717 ShiftedOperand = Dst.getOperand(0);
3718 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3719 return true;
3720 }
3721
3722 uint64_t SrlImm;
3723 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3724 ShiftedOperand = Dst.getOperand(0);
3725 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3726 return true;
3727 }
3728 return false;
3729}
3730
3731// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3732// the operands and select it to AArch64::ORR with shifted registers if
3733// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3734static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3735 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3736 const bool BiggerPattern) {
3737 EVT VT = N->getValueType(0);
3738 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3739 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3740 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3741 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3742 assert((VT == MVT::i32 || VT == MVT::i64) &&
3743 "Expect result type to be i32 or i64 since N is combinable to BFM");
3744 SDLoc DL(N);
3745
3746 // Bail out if BFM simplifies away one node in BFM Dst.
3747 if (OrOpd1 != Dst)
3748 return false;
3749
3750 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3751 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3752 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3753 if (BiggerPattern) {
3754 uint64_t SrcAndImm;
3755 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3756 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3757 // OrOpd0 = AND Src, #Mask
3758 // So BFM simplifies away one AND node from Src and doesn't simplify away
3759 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3760 // one node (from Rd), ORR is better since it has higher throughput and
3761 // smaller latency than BFM on many AArch64 processors (and for the rest
3762 // ORR is at least as good as BFM).
3763 SDValue ShiftedOperand;
3764 uint64_t EncodedShiftImm;
3765 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3766 EncodedShiftImm)) {
3767 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3768 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3769 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3770 return true;
3771 }
3772 }
3773 return false;
3774 }
3775
3776 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3777
3778 uint64_t ShlImm;
3779 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3780 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3781 SDValue Ops[] = {
3782 Dst, Src,
3783 CurDAG->getTargetConstant(
3785 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3786 return true;
3787 }
3788
3789 // Select the following pattern to left-shifted operand rather than BFI.
3790 // %val1 = op ..
3791 // %val2 = shl %val1, #imm
3792 // %res = or %val1, %val2
3793 //
3794 // If N is selected to be BFI, we know that
3795 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3796 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3797 //
3798 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3799 if (OrOpd0.getOperand(0) == OrOpd1) {
3800 SDValue Ops[] = {
3801 OrOpd1, OrOpd1,
3802 CurDAG->getTargetConstant(
3804 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3805 return true;
3806 }
3807 }
3808
3809 uint64_t SrlImm;
3810 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3811 // Select the following pattern to right-shifted operand rather than BFXIL.
3812 // %val1 = op ..
3813 // %val2 = lshr %val1, #imm
3814 // %res = or %val1, %val2
3815 //
3816 // If N is selected to be BFXIL, we know that
3817 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3818 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3819 //
3820 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3821 if (OrOpd0.getOperand(0) == OrOpd1) {
3822 SDValue Ops[] = {
3823 OrOpd1, OrOpd1,
3824 CurDAG->getTargetConstant(
3826 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3827 return true;
3828 }
3829 }
3830
3831 return false;
3832}
3833
3834static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3835 SelectionDAG *CurDAG) {
3836 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3837
3838 EVT VT = N->getValueType(0);
3839 if (VT != MVT::i32 && VT != MVT::i64)
3840 return false;
3841
3842 unsigned BitWidth = VT.getSizeInBits();
3843
3844 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3845 // have the expected shape. Try to undo that.
3846
3847 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3848 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3849
3850 // Given a OR operation, check if we have the following pattern
3851 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3852 // isBitfieldExtractOp)
3853 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3854 // countTrailingZeros(mask2) == imm2 - imm + 1
3855 // f = d | c
3856 // if yes, replace the OR instruction with:
3857 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3858
3859 // OR is commutative, check all combinations of operand order and values of
3860 // BiggerPattern, i.e.
3861 // Opd0, Opd1, BiggerPattern=false
3862 // Opd1, Opd0, BiggerPattern=false
3863 // Opd0, Opd1, BiggerPattern=true
3864 // Opd1, Opd0, BiggerPattern=true
3865 // Several of these combinations may match, so check with BiggerPattern=false
3866 // first since that will produce better results by matching more instructions
3867 // and/or inserting fewer extra instructions.
3868 for (int I = 0; I < 4; ++I) {
3869
3870 SDValue Dst, Src;
3871 unsigned ImmR, ImmS;
3872 bool BiggerPattern = I / 2;
3873 SDValue OrOpd0Val = N->getOperand(I % 2);
3874 SDNode *OrOpd0 = OrOpd0Val.getNode();
3875 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3876 SDNode *OrOpd1 = OrOpd1Val.getNode();
3877
3878 unsigned BFXOpc;
3879 int DstLSB, Width;
3880 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3881 NumberOfIgnoredLowBits, BiggerPattern)) {
3882 // Check that the returned opcode is compatible with the pattern,
3883 // i.e., same type and zero extended (U and not S)
3884 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3885 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3886 continue;
3887
3888 // Compute the width of the bitfield insertion
3889 DstLSB = 0;
3890 Width = ImmS - ImmR + 1;
3891 // FIXME: This constraint is to catch bitfield insertion we may
3892 // want to widen the pattern if we want to grab general bitfield
3893 // move case
3894 if (Width <= 0)
3895 continue;
3896
3897 // If the mask on the insertee is correct, we have a BFXIL operation. We
3898 // can share the ImmR and ImmS values from the already-computed UBFM.
3899 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3900 BiggerPattern,
3901 Src, DstLSB, Width)) {
3902 ImmR = (BitWidth - DstLSB) % BitWidth;
3903 ImmS = Width - 1;
3904 } else
3905 continue;
3906
3907 // Check the second part of the pattern
3908 EVT VT = OrOpd1Val.getValueType();
3909 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3910
3911 // Compute the Known Zero for the candidate of the first operand.
3912 // This allows to catch more general case than just looking for
3913 // AND with imm. Indeed, simplify-demanded-bits may have removed
3914 // the AND instruction because it proves it was useless.
3915 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3916
3917 // Check if there is enough room for the second operand to appear
3918 // in the first one
3919 APInt BitsToBeInserted =
3920 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3921
3922 if ((BitsToBeInserted & ~Known.Zero) != 0)
3923 continue;
3924
3925 // Set the first operand
3926 uint64_t Imm;
3927 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3928 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3929 // In that case, we can eliminate the AND
3930 Dst = OrOpd1->getOperand(0);
3931 else
3932 // Maybe the AND has been removed by simplify-demanded-bits
3933 // or is useful because it discards more bits
3934 Dst = OrOpd1Val;
3935
3936 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3937 // with shifted operand is more efficient.
3938 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3939 BiggerPattern))
3940 return true;
3941
3942 // both parts match
3943 SDLoc DL(N);
3944 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3945 CurDAG->getTargetConstant(ImmS, DL, VT)};
3946 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3947 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3948 return true;
3949 }
3950
3951 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3952 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3953 // mask (e.g., 0x000ffff0).
3954 uint64_t Mask0Imm, Mask1Imm;
3955 SDValue And0 = N->getOperand(0);
3956 SDValue And1 = N->getOperand(1);
3957 if (And0.hasOneUse() && And1.hasOneUse() &&
3958 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3959 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3960 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3961 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3962
3963 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3964 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3965 // bits to be inserted.
3966 if (isShiftedMask(Mask0Imm, VT)) {
3967 std::swap(And0, And1);
3968 std::swap(Mask0Imm, Mask1Imm);
3969 }
3970
3971 SDValue Src = And1->getOperand(0);
3972 SDValue Dst = And0->getOperand(0);
3973 unsigned LSB = llvm::countr_zero(Mask1Imm);
3974 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3975
3976 // The BFXIL inserts the low-order bits from a source register, so right
3977 // shift the needed bits into place.
3978 SDLoc DL(N);
3979 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3980 uint64_t LsrImm = LSB;
3981 if (Src->hasOneUse() &&
3982 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3983 (LsrImm + LSB) < BitWidth) {
3984 Src = Src->getOperand(0);
3985 LsrImm += LSB;
3986 }
3987
3988 SDNode *LSR = CurDAG->getMachineNode(
3989 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3990 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3991
3992 // BFXIL is an alias of BFM, so translate to BFM operands.
3993 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3994 unsigned ImmS = Width - 1;
3995
3996 // Create the BFXIL instruction.
3997 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3998 CurDAG->getTargetConstant(ImmR, DL, VT),
3999 CurDAG->getTargetConstant(ImmS, DL, VT)};
4000 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
4001 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4002 return true;
4003 }
4004
4005 return false;
4006}
4007
4008bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
4009 if (N->getOpcode() != ISD::OR)
4010 return false;
4011
4012 APInt NUsefulBits;
4013 getUsefulBits(SDValue(N, 0), NUsefulBits);
4014
4015 // If all bits are not useful, just return UNDEF.
4016 if (!NUsefulBits) {
4017 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
4018 return true;
4019 }
4020
4021 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
4022 return true;
4023
4024 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
4025}
4026
4027/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
4028/// equivalent of a left shift by a constant amount followed by an and masking
4029/// out a contiguous set of bits.
4030bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
4031 if (N->getOpcode() != ISD::AND)
4032 return false;
4033
4034 EVT VT = N->getValueType(0);
4035 if (VT != MVT::i32 && VT != MVT::i64)
4036 return false;
4037
4038 SDValue Op0;
4039 int DstLSB, Width;
4040 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
4041 Op0, DstLSB, Width))
4042 return false;
4043
4044 // ImmR is the rotate right amount.
4045 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
4046 // ImmS is the most significant bit of the source to be moved.
4047 unsigned ImmS = Width - 1;
4048
4049 SDLoc DL(N);
4050 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
4051 CurDAG->getTargetConstant(ImmS, DL, VT)};
4052 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
4053 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4054 return true;
4055}
4056
4057/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
4058/// variable shift/rotate instructions.
4059bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
4060 EVT VT = N->getValueType(0);
4061
4062 unsigned Opc;
4063 switch (N->getOpcode()) {
4064 case ISD::ROTR:
4065 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
4066 break;
4067 case ISD::SHL:
4068 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
4069 break;
4070 case ISD::SRL:
4071 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
4072 break;
4073 case ISD::SRA:
4074 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
4075 break;
4076 default:
4077 return false;
4078 }
4079
4080 uint64_t Size;
4081 uint64_t Bits;
4082 if (VT == MVT::i32) {
4083 Bits = 5;
4084 Size = 32;
4085 } else if (VT == MVT::i64) {
4086 Bits = 6;
4087 Size = 64;
4088 } else
4089 return false;
4090
4091 SDValue ShiftAmt = N->getOperand(1);
4092 SDLoc DL(N);
4093 SDValue NewShiftAmt;
4094
4095 // Skip over an extend of the shift amount.
4096 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
4097 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
4098 ShiftAmt = ShiftAmt->getOperand(0);
4099
4100 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
4101 SDValue Add0 = ShiftAmt->getOperand(0);
4102 SDValue Add1 = ShiftAmt->getOperand(1);
4103 uint64_t Add0Imm;
4104 uint64_t Add1Imm;
4105 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
4106 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
4107 // to avoid the ADD/SUB.
4108 NewShiftAmt = Add0;
4109 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
4110 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
4111 (Add0Imm % Size == 0)) {
4112 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
4113 // to generate a NEG instead of a SUB from a constant.
4114 unsigned NegOpc;
4115 unsigned ZeroReg;
4116 EVT SubVT = ShiftAmt->getValueType(0);
4117 if (SubVT == MVT::i32) {
4118 NegOpc = AArch64::SUBWrr;
4119 ZeroReg = AArch64::WZR;
4120 } else {
4121 assert(SubVT == MVT::i64);
4122 NegOpc = AArch64::SUBXrr;
4123 ZeroReg = AArch64::XZR;
4124 }
4125 SDValue Zero =
4126 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
4127 MachineSDNode *Neg =
4128 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
4129 NewShiftAmt = SDValue(Neg, 0);
4130 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
4131 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
4132 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
4133 // to generate a NOT instead of a SUB from a constant.
4134 unsigned NotOpc;
4135 unsigned ZeroReg;
4136 EVT SubVT = ShiftAmt->getValueType(0);
4137 if (SubVT == MVT::i32) {
4138 NotOpc = AArch64::ORNWrr;
4139 ZeroReg = AArch64::WZR;
4140 } else {
4141 assert(SubVT == MVT::i64);
4142 NotOpc = AArch64::ORNXrr;
4143 ZeroReg = AArch64::XZR;
4144 }
4145 SDValue Zero =
4146 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
4147 MachineSDNode *Not =
4148 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
4149 NewShiftAmt = SDValue(Not, 0);
4150 } else
4151 return false;
4152 } else {
4153 // If the shift amount is masked with an AND, check that the mask covers the
4154 // bits that are implicitly ANDed off by the above opcodes and if so, skip
4155 // the AND.
4156 uint64_t MaskImm;
4157 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
4158 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
4159 return false;
4160
4161 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
4162 return false;
4163
4164 NewShiftAmt = ShiftAmt->getOperand(0);
4165 }
4166
4167 // Narrow/widen the shift amount to match the size of the shift operation.
4168 if (VT == MVT::i32)
4169 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
4170 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
4171 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
4172 MachineSDNode *Ext = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, VT,
4173 NewShiftAmt, SubReg);
4174 NewShiftAmt = SDValue(Ext, 0);
4175 }
4176
4177 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
4178 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4179 return true;
4180}
4181
4183 SDValue &FixedPos,
4184 unsigned RegWidth,
4185 bool isReciprocal) {
4186 APFloat FVal(0.0);
4188 FVal = CN->getValueAPF();
4189 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
4190 // Some otherwise illegal constants are allowed in this case.
4191 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
4192 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
4193 return false;
4194
4195 ConstantPoolSDNode *CN =
4196 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
4197 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
4198 } else
4199 return false;
4200
4201 if (unsigned FBits =
4202 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal)) {
4203 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4204 return true;
4205 }
4206
4207 return false;
4208}
4209
4211 SDValue N,
4212 SDValue &FixedPos,
4213 unsigned RegWidth,
4214 bool isReciprocal) {
4215 if ((N.getOpcode() == AArch64ISD::NVCAST || N.getOpcode() == ISD::BITCAST) &&
4216 N.getValueType().getScalarSizeInBits() ==
4217 N.getOperand(0).getValueType().getScalarSizeInBits())
4218 N = N.getOperand(0);
4219
4220 auto ImmToFloat = [RegWidth](APInt Imm) {
4221 switch (RegWidth) {
4222 case 16:
4223 return APFloat(APFloat::IEEEhalf(), Imm);
4224 case 32:
4225 return APFloat(APFloat::IEEEsingle(), Imm);
4226 case 64:
4227 return APFloat(APFloat::IEEEdouble(), Imm);
4228 default:
4229 llvm_unreachable("Unexpected RegWidth!");
4230 };
4231 };
4232
4233 APFloat FVal(0.0);
4234 switch (N->getOpcode()) {
4235 case AArch64ISD::MOVIshift:
4236 FVal = ImmToFloat(APInt(RegWidth, N.getConstantOperandVal(0)
4237 << N.getConstantOperandVal(1)));
4238 break;
4239 case AArch64ISD::FMOV:
4240 FVal = ImmToFloat(DecodeFMOVImm(N.getConstantOperandVal(0), RegWidth));
4241 break;
4242 case AArch64ISD::DUP:
4243 if (isa<ConstantSDNode>(N.getOperand(0)))
4244 FVal = ImmToFloat(N.getConstantOperandAPInt(0).trunc(RegWidth));
4245 else
4246 return false;
4247 break;
4248 default:
4249 return false;
4250 }
4251
4252 if (unsigned FBits =
4253 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal)) {
4254 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4255 return true;
4256 }
4257
4258 return false;
4259}
4260
4261bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4262 unsigned RegWidth) {
4263 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4264 /*isReciprocal*/ false);
4265}
4266
4267bool AArch64DAGToDAGISel::SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos,
4268 unsigned RegWidth) {
4270 CurDAG, N, FixedPos, RegWidth, /*isReciprocal*/ false);
4271}
4272
4273bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperandVec(SDValue N,
4274 SDValue &FixedPos,
4275 unsigned RegWidth) {
4277 CurDAG, N, FixedPos, RegWidth, /*isReciprocal*/ true);
4278}
4279
4280bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4281 SDValue &FixedPos,
4282 unsigned RegWidth) {
4283 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4284 /*isReciprocal*/ true);
4285}
4286
4287// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4288// of the string and obtains the integer values from them and combines these
4289// into a single value to be used in the MRS/MSR instruction.
4292 RegString.split(Fields, ':');
4293
4294 if (Fields.size() == 1)
4295 return -1;
4296
4297 assert(Fields.size() == 5
4298 && "Invalid number of fields in read register string");
4299
4301 bool AllIntFields = true;
4302
4303 for (StringRef Field : Fields) {
4304 unsigned IntField;
4305 AllIntFields &= !Field.getAsInteger(10, IntField);
4306 Ops.push_back(IntField);
4307 }
4308
4309 assert(AllIntFields &&
4310 "Unexpected non-integer value in special register string.");
4311 (void)AllIntFields;
4312
4313 // Need to combine the integer fields of the string into a single value
4314 // based on the bit encoding of MRS/MSR instruction.
4315 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | (Ops[3] << 3) |
4316 (Ops[4]);
4317}
4318
4319// Lower the read_register intrinsic to an MRS instruction node if the special
4320// register string argument is either of the form detailed in the ALCE (the
4321// form described in getIntOperandsFromRegisterString) or is a named register
4322// known by the MRS SysReg mapper.
4323bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4324 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4325 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4326 SDLoc DL(N);
4327
4328 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4329
4330 unsigned Opcode64Bit = AArch64::MRS;
4331 int Imm = getIntOperandFromRegisterString(RegString->getString());
4332 if (Imm == -1) {
4333 // No match, Use the sysreg mapper to map the remaining possible strings to
4334 // the value for the register to be used for the instruction operand.
4335 const auto *TheReg =
4336 AArch64SysReg::lookupSysRegByName(RegString->getString());
4337 if (TheReg && TheReg->Readable &&
4338 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4339 Imm = TheReg->Encoding;
4340 else
4341 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4342
4343 if (Imm == -1) {
4344 // Still no match, see if this is "pc" or give up.
4345 if (!ReadIs128Bit && RegString->getString() == "pc") {
4346 Opcode64Bit = AArch64::ADR;
4347 Imm = 0;
4348 } else {
4349 // Not a system register. It may name an allocatable 64-bit GPR/FPR read
4350 // by the MSVC __getReg/__getRegFp intrinsics. Emit a pseudo that
4351 // carries the source register as an immediate so the read does not
4352 // reference an undefined physical register (which the machine verifier
4353 // rejects); the AsmPrinter materializes the real mov/fmov.
4354 Register PReg = Subtarget->getTargetLowering()->matchRegisterName(
4355 RegString->getString());
4356 unsigned PseudoOp = 0;
4357 if (AArch64::GPR64RegClass.contains(PReg))
4358 PseudoOp = AArch64::READ_REGISTER_GPR64;
4359 else if (AArch64::FPR64RegClass.contains(PReg))
4360 PseudoOp = AArch64::READ_REGISTER_FPR64;
4361 if (!ReadIs128Bit && PseudoOp && N->getValueType(0) == MVT::i64) {
4362 CurDAG->SelectNodeTo(N, PseudoOp, MVT::i64, MVT::Other,
4363 {CurDAG->getTargetConstant(PReg, DL, MVT::i32),
4364 N->getOperand(0)});
4365 return true;
4366 }
4367 return false;
4368 }
4369 }
4370 }
4371
4372 SDValue InChain = N->getOperand(0);
4373 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4374 if (!ReadIs128Bit) {
4375 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4376 {SysRegImm, InChain});
4377 } else {
4378 SDNode *MRRS = CurDAG->getMachineNode(
4379 AArch64::MRRS, DL,
4380 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4381 {SysRegImm, InChain});
4382
4383 // Sysregs are not endian. The even register always contains the low half
4384 // of the register.
4385 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4386 SDValue(MRRS, 0));
4387 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4388 SDValue(MRRS, 0));
4389 SDValue OutChain = SDValue(MRRS, 1);
4390
4391 ReplaceUses(SDValue(N, 0), Lo);
4392 ReplaceUses(SDValue(N, 1), Hi);
4393 ReplaceUses(SDValue(N, 2), OutChain);
4394 };
4395 return true;
4396}
4397
4398// Lower the write_register intrinsic to an MSR instruction node if the special
4399// register string argument is either of the form detailed in the ALCE (the
4400// form described in getIntOperandsFromRegisterString) or is a named register
4401// known by the MSR SysReg mapper.
4402bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4403 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4404 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4405 SDLoc DL(N);
4406
4407 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4408
4409 if (!WriteIs128Bit) {
4410 // Check if the register was one of those allowed as the pstatefield value
4411 // in the MSR (immediate) instruction. To accept the values allowed in the
4412 // pstatefield for the MSR (immediate) instruction, we also require that an
4413 // immediate value has been provided as an argument, we know that this is
4414 // the case as it has been ensured by semantic checking.
4415 auto trySelectPState = [&](auto PMapper, unsigned State) {
4416 if (PMapper) {
4417 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4418 "Expected a constant integer expression.");
4419 unsigned Reg = PMapper->Encoding;
4420 uint64_t Immed = N->getConstantOperandVal(2);
4421 CurDAG->SelectNodeTo(
4422 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4423 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4424 return true;
4425 }
4426 return false;
4427 };
4428
4429 if (trySelectPState(
4430 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4431 AArch64::MSRpstateImm4))
4432 return true;
4433 if (trySelectPState(
4434 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4435 AArch64::MSRpstateImm1))
4436 return true;
4437 }
4438
4439 int Imm = getIntOperandFromRegisterString(RegString->getString());
4440 if (Imm == -1) {
4441 // Use the sysreg mapper to attempt to map the remaining possible strings
4442 // to the value for the register to be used for the MSR (register)
4443 // instruction operand.
4444 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4445 if (TheReg && TheReg->Writeable &&
4446 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4447 Imm = TheReg->Encoding;
4448 else
4449 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4450
4451 if (Imm == -1) {
4452 // Used by the MSVC __setReg/__setRegFp intrinsics. Copy the value into
4453 // the physical register and keep it live with a FAKE_USE so the write is
4454 // not dead-eliminated. (getRegisterByName rejects allocatable registers,
4455 // so the generic write path cannot handle these.)
4456 Register PReg = Subtarget->getTargetLowering()->matchRegisterName(
4457 RegString->getString());
4458 bool IsGPR = AArch64::GPR64RegClass.contains(PReg);
4459 bool IsFPR = AArch64::FPR64RegClass.contains(PReg);
4460 if (!WriteIs128Bit && (IsGPR || IsFPR) &&
4461 N->getOperand(2).getValueType() == MVT::i64) {
4462 SDValue Copy =
4463 CurDAG->getCopyToReg(N->getOperand(0), DL, PReg, N->getOperand(2));
4464 SDValue RegOp = CurDAG->getRegister(PReg, MVT::i64);
4465 SDNode *FakeUse = CurDAG->getMachineNode(TargetOpcode::FAKE_USE, DL,
4466 MVT::Other, {RegOp, Copy});
4467 ReplaceUses(SDValue(N, 0), SDValue(FakeUse, 0));
4468 CurDAG->RemoveDeadNode(N);
4469 return true;
4470 }
4471 return false;
4472 }
4473 }
4474
4475 SDValue InChain = N->getOperand(0);
4476 if (!WriteIs128Bit) {
4477 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4478 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4479 N->getOperand(2), InChain);
4480 } else {
4481 // No endian swap. The lower half always goes into the even subreg, and the
4482 // higher half always into the odd supreg.
4483 SDNode *Pair = CurDAG->getMachineNode(
4484 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4485 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4486 MVT::i32),
4487 N->getOperand(2),
4488 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4489 N->getOperand(3),
4490 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4491
4492 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4493 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4494 SDValue(Pair, 0), InChain);
4495 }
4496
4497 return true;
4498}
4499
4500/// We've got special pseudo-instructions for these
4501bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4502 unsigned Opcode;
4503 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4504
4505 // Leave IR for LSE if subtarget supports it.
4506 if (Subtarget->hasLSE()) return false;
4507
4508 if (MemTy == MVT::i8)
4509 Opcode = AArch64::CMP_SWAP_8;
4510 else if (MemTy == MVT::i16)
4511 Opcode = AArch64::CMP_SWAP_16;
4512 else if (MemTy == MVT::i32)
4513 Opcode = AArch64::CMP_SWAP_32;
4514 else if (MemTy == MVT::i64)
4515 Opcode = AArch64::CMP_SWAP_64;
4516 else
4517 llvm_unreachable("Unknown AtomicCmpSwap type");
4518
4519 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4520 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4521 N->getOperand(0)};
4522 SDNode *CmpSwap = CurDAG->getMachineNode(
4523 Opcode, SDLoc(N),
4524 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4525
4526 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4527 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4528
4529 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4530 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4531 CurDAG->RemoveDeadNode(N);
4532
4533 return true;
4534}
4535
4536bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4537 SDValue &Shift, bool Negate) {
4538 if (!isa<ConstantSDNode>(N))
4539 return false;
4540
4541 APInt Val =
4542 cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4543
4544 return SelectSVEAddSubImm(SDLoc(N), Val, VT, Imm, Shift, Negate);
4545}
4546
4547bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDLoc DL, APInt Val, MVT VT,
4548 SDValue &Imm, SDValue &Shift,
4549 bool Negate) {
4550 if (Negate)
4551 Val = -Val;
4552
4553 switch (VT.SimpleTy) {
4554 case MVT::i8:
4555 // All immediates are supported.
4556 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4557 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4558 return true;
4559 case MVT::i16:
4560 case MVT::i32:
4561 case MVT::i64:
4562 // Support 8bit unsigned immediates.
4563 if ((Val & ~0xff) == 0) {
4564 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4565 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4566 return true;
4567 }
4568 // Support 16bit unsigned immediates that are a multiple of 256.
4569 if ((Val & ~0xff00) == 0) {
4570 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4571 Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
4572 return true;
4573 }
4574 break;
4575 default:
4576 break;
4577 }
4578
4579 return false;
4580}
4581
4582bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4583 SDValue &Imm, SDValue &Shift,
4584 bool Negate) {
4585 if (!isa<ConstantSDNode>(N))
4586 return false;
4587
4588 SDLoc DL(N);
4589 int64_t Val = cast<ConstantSDNode>(N)
4590 ->getAPIntValue()
4592 .getSExtValue();
4593
4594 if (Negate)
4595 Val = -Val;
4596
4597 // Signed saturating instructions treat their immediate operand as unsigned,
4598 // whereas the related intrinsics define their operands to be signed. This
4599 // means we can only use the immediate form when the operand is non-negative.
4600 if (Val < 0)
4601 return false;
4602
4603 switch (VT.SimpleTy) {
4604 case MVT::i8:
4605 // All positive immediates are supported.
4606 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4607 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4608 return true;
4609 case MVT::i16:
4610 case MVT::i32:
4611 case MVT::i64:
4612 // Support 8bit positive immediates.
4613 if (Val <= 255) {
4614 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4615 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4616 return true;
4617 }
4618 // Support 16bit positive immediates that are a multiple of 256.
4619 if (Val <= 65280 && Val % 256 == 0) {
4620 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4621 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4622 return true;
4623 }
4624 break;
4625 default:
4626 break;
4627 }
4628
4629 return false;
4630}
4631
4632bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4633 SDValue &Shift) {
4634 if (!isa<ConstantSDNode>(N))
4635 return false;
4636
4637 SDLoc DL(N);
4638 int64_t Val = cast<ConstantSDNode>(N)
4639 ->getAPIntValue()
4640 .trunc(VT.getFixedSizeInBits())
4641 .getSExtValue();
4642 int32_t ImmVal, ShiftVal;
4643 if (!AArch64_AM::isSVECpyDupImm(VT.getScalarSizeInBits(), Val, ImmVal,
4644 ShiftVal))
4645 return false;
4646
4647 Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32);
4648 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4649 return true;
4650}
4651
4652bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4653 if (auto CNode = dyn_cast<ConstantSDNode>(N))
4654 return SelectSVESignedArithImm(SDLoc(N), CNode->getAPIntValue(), Imm);
4655 return false;
4656}
4657
4658bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDLoc DL, APInt Val,
4659 SDValue &Imm) {
4660 int64_t ImmVal = Val.getSExtValue();
4661 if (ImmVal >= -128 && ImmVal < 128) {
4662 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4663 return true;
4664 }
4665 return false;
4666}
4667
4668bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4669 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4670 uint64_t ImmVal = CNode->getZExtValue();
4671
4672 switch (VT.SimpleTy) {
4673 case MVT::i8:
4674 ImmVal &= 0xFF;
4675 break;
4676 case MVT::i16:
4677 ImmVal &= 0xFFFF;
4678 break;
4679 case MVT::i32:
4680 ImmVal &= 0xFFFFFFFF;
4681 break;
4682 case MVT::i64:
4683 break;
4684 default:
4685 llvm_unreachable("Unexpected type");
4686 }
4687
4688 if (ImmVal < 256) {
4689 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4690 return true;
4691 }
4692 }
4693 return false;
4694}
4695
4696bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4697 bool Invert) {
4698 uint64_t ImmVal;
4699 if (auto CI = dyn_cast<ConstantSDNode>(N))
4700 ImmVal = CI->getZExtValue();
4701 else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
4702 ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
4703 else
4704 return false;
4705
4706 if (Invert)
4707 ImmVal = ~ImmVal;
4708
4709 uint64_t encoding;
4710 if (!AArch64_AM::isSVELogicalImm(VT.getScalarSizeInBits(), ImmVal, encoding))
4711 return false;
4712
4713 Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
4714 return true;
4715}
4716
4717// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4718// Rather than attempt to normalise everything we can sometimes saturate the
4719// shift amount during selection. This function also allows for consistent
4720// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4721// required by the instructions.
4722bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4723 uint64_t High, bool AllowSaturation,
4724 SDValue &Imm) {
4725 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4726 uint64_t ImmVal = CN->getZExtValue();
4727
4728 // Reject shift amounts that are too small.
4729 if (ImmVal < Low)
4730 return false;
4731
4732 // Reject or saturate shift amounts that are too big.
4733 if (ImmVal > High) {
4734 if (!AllowSaturation)
4735 return false;
4736 ImmVal = High;
4737 }
4738
4739 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4740 return true;
4741 }
4742
4743 return false;
4744}
4745
4746bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4747 // tagp(FrameIndex, IRGstack, tag_offset):
4748 // since the offset between FrameIndex and IRGstack is a compile-time
4749 // constant, this can be lowered to a single ADDG instruction.
4750 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4751 return false;
4752 }
4753
4754 SDValue IRG_SP = N->getOperand(2);
4755 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4756 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4757 return false;
4758 }
4759
4760 const TargetLowering *TLI = getTargetLowering();
4761 SDLoc DL(N);
4762 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4763 SDValue FiOp = CurDAG->getTargetFrameIndex(
4764 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4765 int TagOffset = N->getConstantOperandVal(3);
4766
4767 SDNode *Out = CurDAG->getMachineNode(
4768 AArch64::TAGPstack, DL, MVT::i64,
4769 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4770 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4771 ReplaceNode(N, Out);
4772 return true;
4773}
4774
4775void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4776 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4777 "llvm.aarch64.tagp third argument must be an immediate");
4778 if (trySelectStackSlotTagP(N))
4779 return;
4780 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4781 // compile-time constant, not just for stack allocations.
4782
4783 // General case for unrelated pointers in Op1 and Op2.
4784 SDLoc DL(N);
4785 int TagOffset = N->getConstantOperandVal(3);
4786 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4787 {N->getOperand(1), N->getOperand(2)});
4788 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4789 {SDValue(N1, 0), N->getOperand(2)});
4790 SDNode *N3 = CurDAG->getMachineNode(
4791 AArch64::ADDG, DL, MVT::i64,
4792 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4793 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4794 ReplaceNode(N, N3);
4795}
4796
4797bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4798 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4799
4800 // Bail when not a "cast" like insert_subvector.
4801 if (N->getConstantOperandVal(2) != 0)
4802 return false;
4803 if (!N->getOperand(0).isUndef())
4804 return false;
4805
4806 // Bail when normal isel should do the job.
4807 EVT VT = N->getValueType(0);
4808 EVT InVT = N->getOperand(1).getValueType();
4809 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4810 return false;
4811 if (InVT.getSizeInBits() <= 128)
4812 return false;
4813
4814 // NOTE: We can only get here when doing fixed length SVE code generation.
4815 // We do manual selection because the types involved are not linked to real
4816 // registers (despite being legal) and must be coerced into SVE registers.
4817
4819 "Expected to insert into a packed scalable vector!");
4820
4821 SDLoc DL(N);
4822 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4823 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4824 N->getOperand(1), RC));
4825 return true;
4826}
4827
4828bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4829 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4830
4831 // Bail when not a "cast" like extract_subvector.
4832 if (N->getConstantOperandVal(1) != 0)
4833 return false;
4834
4835 // Bail when normal isel can do the job.
4836 EVT VT = N->getValueType(0);
4837 EVT InVT = N->getOperand(0).getValueType();
4838 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4839 return false;
4840 if (VT.getSizeInBits() <= 128)
4841 return false;
4842
4843 // NOTE: We can only get here when doing fixed length SVE code generation.
4844 // We do manual selection because the types involved are not linked to real
4845 // registers (despite being legal) and must be coerced into SVE registers.
4846
4848 "Expected to extract from a packed scalable vector!");
4849
4850 SDLoc DL(N);
4851 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4852 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4853 N->getOperand(0), RC));
4854 return true;
4855}
4856
4857bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4858 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4859
4860 SDValue N0 = N->getOperand(0);
4861 SDValue N1 = N->getOperand(1);
4862
4863 EVT VT = N->getValueType(0);
4864 SDLoc DL(N);
4865
4866 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4867 // Rotate by a constant is a funnel shift in IR which is expanded to
4868 // an OR with shifted operands.
4869 // We do the following transform:
4870 // OR N0, N1 -> xar (x, y, imm)
4871 // Where:
4872 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4873 // N0 = SHL_PRED true, V, splat(bits-imm)
4874 // V = (xor x, y)
4875 if (VT.isScalableVector() &&
4876 (Subtarget->hasSVE2() ||
4877 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4878 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4879 N1.getOpcode() != AArch64ISD::SRL_PRED)
4880 std::swap(N0, N1);
4881 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4882 N1.getOpcode() != AArch64ISD::SRL_PRED)
4883 return false;
4884
4885 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4886 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4887 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4888 return false;
4889
4890 if (N0.getOperand(1) != N1.getOperand(1))
4891 return false;
4892
4893 SDValue R1, R2;
4894 bool IsXOROperand = true;
4895 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4896 IsXOROperand = false;
4897 } else {
4898 R1 = N0.getOperand(1).getOperand(0);
4899 R2 = N1.getOperand(1).getOperand(1);
4900 }
4901
4902 APInt ShlAmt, ShrAmt;
4903 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4905 return false;
4906
4907 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4908 return false;
4909
4910 if (!IsXOROperand) {
4911 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4912 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4913 SDValue MOVIV = SDValue(MOV, 0);
4914
4915 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4916 SDNode *SubRegToReg =
4917 CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, VT, MOVIV, ZSub);
4918
4919 R1 = N1->getOperand(1);
4920 R2 = SDValue(SubRegToReg, 0);
4921 }
4922
4923 SDValue Imm =
4924 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4925
4926 SDValue Ops[] = {R1, R2, Imm};
4928 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4929 AArch64::XAR_ZZZI_D})) {
4930 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4931 return true;
4932 }
4933 return false;
4934 }
4935
4936 // We have Neon SHA3 XAR operation for v2i64 but for types
4937 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4938 // is available.
4939 EVT SVT;
4940 switch (VT.getSimpleVT().SimpleTy) {
4941 case MVT::v4i32:
4942 case MVT::v2i32:
4943 SVT = MVT::nxv4i32;
4944 break;
4945 case MVT::v8i16:
4946 case MVT::v4i16:
4947 SVT = MVT::nxv8i16;
4948 break;
4949 case MVT::v16i8:
4950 case MVT::v8i8:
4951 SVT = MVT::nxv16i8;
4952 break;
4953 case MVT::v2i64:
4954 case MVT::v1i64:
4955 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4956 break;
4957 default:
4958 return false;
4959 }
4960
4961 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4962 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4963 return false;
4964
4965 if (N0->getOpcode() != AArch64ISD::VSHL ||
4966 N1->getOpcode() != AArch64ISD::VLSHR)
4967 return false;
4968
4969 if (N0->getOperand(0) != N1->getOperand(0))
4970 return false;
4971
4972 SDValue R1, R2;
4973 bool IsXOROperand = true;
4974 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4975 IsXOROperand = false;
4976 } else {
4977 SDValue XOR = N0.getOperand(0);
4978 R1 = XOR.getOperand(0);
4979 R2 = XOR.getOperand(1);
4980 }
4981
4982 unsigned HsAmt = N0.getConstantOperandVal(1);
4983 unsigned ShAmt = N1.getConstantOperandVal(1);
4984
4985 SDValue Imm = CurDAG->getTargetConstant(
4986 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4987
4988 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4989 if (ShAmt + HsAmt != VTSizeInBits)
4990 return false;
4991
4992 if (!IsXOROperand) {
4993 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4994 SDNode *MOV =
4995 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4996 SDValue MOVIV = SDValue(MOV, 0);
4997
4998 R1 = N1->getOperand(0);
4999 R2 = MOVIV;
5000 }
5001
5002 if (SVT != VT) {
5003 SDValue Undef =
5004 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
5005
5006 if (SVT.isScalableVector() && VT.is64BitVector()) {
5007 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
5008
5009 SDValue UndefQ = SDValue(
5010 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
5011 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
5012
5013 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
5014 UndefQ, R1, DSub),
5015 0);
5016 if (R2.getValueType() == VT)
5017 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
5018 UndefQ, R2, DSub),
5019 0);
5020 }
5021
5022 SDValue SubReg = CurDAG->getTargetConstant(
5023 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
5024
5025 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
5026 R1, SubReg),
5027 0);
5028
5029 if (SVT.isScalableVector() || R2.getValueType() != SVT)
5030 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
5031 Undef, R2, SubReg),
5032 0);
5033 }
5034
5035 SDValue Ops[] = {R1, R2, Imm};
5036 SDNode *XAR = nullptr;
5037
5038 if (SVT.isScalableVector()) {
5040 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
5041 AArch64::XAR_ZZZI_D}))
5042 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
5043 } else {
5044 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
5045 }
5046
5047 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
5048
5049 if (SVT != VT) {
5050 if (VT.is64BitVector() && SVT.isScalableVector()) {
5051 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
5052
5053 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
5054 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
5055 SDValue(XAR, 0), ZSub);
5056
5057 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
5058 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
5059 SDValue(Q, 0), DSub);
5060 } else {
5061 SDValue SubReg = CurDAG->getTargetConstant(
5062 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
5063 MVT::i32);
5064 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
5065 SDValue(XAR, 0), SubReg);
5066 }
5067 }
5068 ReplaceNode(N, XAR);
5069 return true;
5070}
5071
5072/// Returns a copy from WZR or XZR. This can be used during instruction
5073/// selection (it does not require any further selection/legalization).
5075 assert(VT == MVT::i32 || VT == MVT::i64);
5076 return DAG.getCopyFromReg(DAG.getEntryNode(), DL,
5077 VT == MVT::i32 ? AArch64::WZR : AArch64::XZR, VT);
5078}
5079
5080void AArch64DAGToDAGISel::Select(SDNode *Node) {
5081 // If we have a custom node, we already have selected!
5082 if (Node->isMachineOpcode()) {
5083 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
5084 Node->setNodeId(-1);
5085 return;
5086 }
5087
5088 // Few custom selection stuff.
5089 EVT VT = Node->getValueType(0);
5090
5091 switch (Node->getOpcode()) {
5092 default:
5093 break;
5094
5096 if (SelectCMP_SWAP(Node))
5097 return;
5098 break;
5099
5100 case ISD::READ_REGISTER:
5101 case AArch64ISD::MRRS:
5102 if (tryReadRegister(Node))
5103 return;
5104 break;
5105
5107 case AArch64ISD::MSRR:
5108 if (tryWriteRegister(Node))
5109 return;
5110 break;
5111
5112 case ISD::LOAD: {
5113 // Try to select as an indexed load. Fall through to normal processing
5114 // if we can't.
5115 if (tryIndexedLoad(Node))
5116 return;
5117 break;
5118 }
5119
5120 case ISD::SRL:
5121 case ISD::AND:
5122 case ISD::SRA:
5124 if (tryBitfieldExtractOp(Node))
5125 return;
5126 if (tryBitfieldInsertInZeroOp(Node))
5127 return;
5128 [[fallthrough]];
5129 case ISD::ROTR:
5130 case ISD::SHL:
5131 if (tryShiftAmountMod(Node))
5132 return;
5133 break;
5134
5135 case ISD::SIGN_EXTEND:
5136 if (tryBitfieldExtractOpFromSExt(Node))
5137 return;
5138 break;
5139
5140 case ISD::OR:
5141 if (tryBitfieldInsertOp(Node))
5142 return;
5143 if (trySelectXAR(Node))
5144 return;
5145 break;
5146
5148 if (trySelectCastScalableToFixedLengthVector(Node))
5149 return;
5150 break;
5151 }
5152
5153 case ISD::INSERT_SUBVECTOR: {
5154 if (trySelectCastFixedLengthToScalableVector(Node))
5155 return;
5156 break;
5157 }
5158
5159 case ISD::Constant: {
5160 // Materialize zero constants as copies from WZR/XZR. This allows
5161 // the coalescer to propagate these into other instructions.
5162 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
5163 if (ConstNode->isZero() && (VT == MVT::i32 || VT == MVT::i64)) {
5164 ReplaceNode(Node, getZeroRegister(*CurDAG, SDLoc(Node), VT).getNode());
5165 return;
5166 }
5167 break;
5168 }
5169
5170 case ISD::FrameIndex: {
5171 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
5172 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
5173 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
5174 const TargetLowering *TLI = getTargetLowering();
5175 SDValue TFI = CurDAG->getTargetFrameIndex(
5176 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
5177 SDLoc DL(Node);
5178 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
5179 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
5180 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
5181 return;
5182 }
5184 unsigned IntNo = Node->getConstantOperandVal(1);
5185 switch (IntNo) {
5186 default:
5187 break;
5188 case Intrinsic::aarch64_gcsss: {
5189 SDLoc DL(Node);
5190 SDValue Chain = Node->getOperand(0);
5191 SDValue Val = Node->getOperand(2);
5192 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
5193 SDNode *SS1 =
5194 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
5195 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
5196 MVT::Other, Zero, SDValue(SS1, 0));
5197 ReplaceNode(Node, SS2);
5198 return;
5199 }
5200 case Intrinsic::aarch64_ldaxp:
5201 case Intrinsic::aarch64_ldxp: {
5202 unsigned Op =
5203 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
5204 SDValue MemAddr = Node->getOperand(2);
5205 SDLoc DL(Node);
5206 SDValue Chain = Node->getOperand(0);
5207
5208 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
5209 MVT::Other, MemAddr, Chain);
5210
5211 // Transfer memoperands.
5212 MachineMemOperand *MemOp =
5213 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
5214 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
5215 ReplaceNode(Node, Ld);
5216 return;
5217 }
5218 case Intrinsic::aarch64_stlxp:
5219 case Intrinsic::aarch64_stxp: {
5220 unsigned Op =
5221 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
5222 SDLoc DL(Node);
5223 SDValue Chain = Node->getOperand(0);
5224 SDValue ValLo = Node->getOperand(2);
5225 SDValue ValHi = Node->getOperand(3);
5226 SDValue MemAddr = Node->getOperand(4);
5227
5228 // Place arguments in the right order.
5229 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
5230
5231 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
5232 // Transfer memoperands.
5233 MachineMemOperand *MemOp =
5234 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
5235 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
5236
5237 ReplaceNode(Node, St);
5238 return;
5239 }
5240 case Intrinsic::aarch64_neon_ld1x2:
5241 if (VT == MVT::v8i8) {
5242 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
5243 return;
5244 } else if (VT == MVT::v16i8) {
5245 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
5246 return;
5247 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5248 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
5249 return;
5250 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5251 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
5252 return;
5253 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5254 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
5255 return;
5256 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5257 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
5258 return;
5259 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5260 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5261 return;
5262 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5263 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
5264 return;
5265 }
5266 break;
5267 case Intrinsic::aarch64_neon_ld1x3:
5268 if (VT == MVT::v8i8) {
5269 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
5270 return;
5271 } else if (VT == MVT::v16i8) {
5272 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
5273 return;
5274 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5275 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
5276 return;
5277 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5278 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
5279 return;
5280 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5281 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
5282 return;
5283 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5284 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
5285 return;
5286 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5287 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5288 return;
5289 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5290 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5291 return;
5292 }
5293 break;
5294 case Intrinsic::aarch64_neon_ld1x4:
5295 if (VT == MVT::v8i8) {
5296 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5297 return;
5298 } else if (VT == MVT::v16i8) {
5299 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5300 return;
5301 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5302 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5303 return;
5304 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5305 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5306 return;
5307 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5308 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5309 return;
5310 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5311 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5312 return;
5313 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5314 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5315 return;
5316 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5317 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5318 return;
5319 }
5320 break;
5321 case Intrinsic::aarch64_neon_ld2:
5322 if (VT == MVT::v8i8) {
5323 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5324 return;
5325 } else if (VT == MVT::v16i8) {
5326 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5327 return;
5328 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5329 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5330 return;
5331 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5332 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5333 return;
5334 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5335 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5336 return;
5337 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5338 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5339 return;
5340 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5341 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5342 return;
5343 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5344 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5345 return;
5346 }
5347 break;
5348 case Intrinsic::aarch64_neon_ld3:
5349 if (VT == MVT::v8i8) {
5350 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5351 return;
5352 } else if (VT == MVT::v16i8) {
5353 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5354 return;
5355 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5356 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5357 return;
5358 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5359 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5360 return;
5361 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5362 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5363 return;
5364 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5365 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5366 return;
5367 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5368 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5369 return;
5370 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5371 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5372 return;
5373 }
5374 break;
5375 case Intrinsic::aarch64_neon_ld4:
5376 if (VT == MVT::v8i8) {
5377 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5378 return;
5379 } else if (VT == MVT::v16i8) {
5380 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5381 return;
5382 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5383 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5384 return;
5385 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5386 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5387 return;
5388 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5389 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5390 return;
5391 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5392 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5393 return;
5394 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5395 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5396 return;
5397 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5398 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5399 return;
5400 }
5401 break;
5402 case Intrinsic::aarch64_neon_ld2r:
5403 if (VT == MVT::v8i8) {
5404 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5405 return;
5406 } else if (VT == MVT::v16i8) {
5407 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5408 return;
5409 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5410 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5411 return;
5412 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5413 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5414 return;
5415 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5416 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5417 return;
5418 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5419 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5420 return;
5421 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5422 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5423 return;
5424 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5425 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5426 return;
5427 }
5428 break;
5429 case Intrinsic::aarch64_neon_ld3r:
5430 if (VT == MVT::v8i8) {
5431 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5432 return;
5433 } else if (VT == MVT::v16i8) {
5434 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5435 return;
5436 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5437 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5438 return;
5439 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5440 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5441 return;
5442 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5443 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5444 return;
5445 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5446 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5447 return;
5448 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5449 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5450 return;
5451 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5452 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5453 return;
5454 }
5455 break;
5456 case Intrinsic::aarch64_neon_ld4r:
5457 if (VT == MVT::v8i8) {
5458 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5459 return;
5460 } else if (VT == MVT::v16i8) {
5461 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5462 return;
5463 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5464 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5465 return;
5466 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5467 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5468 return;
5469 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5470 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5471 return;
5472 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5473 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5474 return;
5475 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5476 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5477 return;
5478 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5479 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5480 return;
5481 }
5482 break;
5483 case Intrinsic::aarch64_neon_ld2lane:
5484 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5485 SelectLoadLane(Node, 2, AArch64::LD2i8);
5486 return;
5487 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5488 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5489 SelectLoadLane(Node, 2, AArch64::LD2i16);
5490 return;
5491 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5492 VT == MVT::v2f32) {
5493 SelectLoadLane(Node, 2, AArch64::LD2i32);
5494 return;
5495 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5496 VT == MVT::v1f64) {
5497 SelectLoadLane(Node, 2, AArch64::LD2i64);
5498 return;
5499 }
5500 break;
5501 case Intrinsic::aarch64_neon_ld3lane:
5502 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5503 SelectLoadLane(Node, 3, AArch64::LD3i8);
5504 return;
5505 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5506 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5507 SelectLoadLane(Node, 3, AArch64::LD3i16);
5508 return;
5509 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5510 VT == MVT::v2f32) {
5511 SelectLoadLane(Node, 3, AArch64::LD3i32);
5512 return;
5513 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5514 VT == MVT::v1f64) {
5515 SelectLoadLane(Node, 3, AArch64::LD3i64);
5516 return;
5517 }
5518 break;
5519 case Intrinsic::aarch64_neon_ld4lane:
5520 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5521 SelectLoadLane(Node, 4, AArch64::LD4i8);
5522 return;
5523 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5524 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5525 SelectLoadLane(Node, 4, AArch64::LD4i16);
5526 return;
5527 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5528 VT == MVT::v2f32) {
5529 SelectLoadLane(Node, 4, AArch64::LD4i32);
5530 return;
5531 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5532 VT == MVT::v1f64) {
5533 SelectLoadLane(Node, 4, AArch64::LD4i64);
5534 return;
5535 }
5536 break;
5537 case Intrinsic::aarch64_ld64b:
5538 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5539 return;
5540 case Intrinsic::aarch64_sve_ld2q_sret: {
5541 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5542 return;
5543 }
5544 case Intrinsic::aarch64_sve_ld3q_sret: {
5545 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5546 return;
5547 }
5548 case Intrinsic::aarch64_sve_ld4q_sret: {
5549 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5550 return;
5551 }
5552 case Intrinsic::aarch64_sve_ld2_sret: {
5553 if (VT == MVT::nxv16i8) {
5554 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5555 true);
5556 return;
5557 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5558 VT == MVT::nxv8bf16) {
5559 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5560 true);
5561 return;
5562 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5563 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5564 true);
5565 return;
5566 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5567 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5568 true);
5569 return;
5570 }
5571 break;
5572 }
5573 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5574 if (VT == MVT::nxv16i8) {
5575 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5576 SelectContiguousMultiVectorLoad(
5577 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5578 else if (Subtarget->hasSVE2p1())
5579 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5580 AArch64::LD1B_2Z);
5581 else
5582 break;
5583 return;
5584 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5585 VT == MVT::nxv8bf16) {
5586 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5587 SelectContiguousMultiVectorLoad(
5588 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5589 else if (Subtarget->hasSVE2p1())
5590 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5591 AArch64::LD1H_2Z);
5592 else
5593 break;
5594 return;
5595 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5596 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5597 SelectContiguousMultiVectorLoad(
5598 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5599 else if (Subtarget->hasSVE2p1())
5600 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5601 AArch64::LD1W_2Z);
5602 else
5603 break;
5604 return;
5605 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5606 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5607 SelectContiguousMultiVectorLoad(
5608 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5609 else if (Subtarget->hasSVE2p1())
5610 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5611 AArch64::LD1D_2Z);
5612 else
5613 break;
5614 return;
5615 }
5616 break;
5617 }
5618 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5619 if (VT == MVT::nxv16i8) {
5620 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5621 SelectContiguousMultiVectorLoad(
5622 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5623 else if (Subtarget->hasSVE2p1())
5624 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5625 AArch64::LD1B_4Z);
5626 else
5627 break;
5628 return;
5629 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5630 VT == MVT::nxv8bf16) {
5631 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5632 SelectContiguousMultiVectorLoad(
5633 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5634 else if (Subtarget->hasSVE2p1())
5635 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5636 AArch64::LD1H_4Z);
5637 else
5638 break;
5639 return;
5640 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5641 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5642 SelectContiguousMultiVectorLoad(
5643 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5644 else if (Subtarget->hasSVE2p1())
5645 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5646 AArch64::LD1W_4Z);
5647 else
5648 break;
5649 return;
5650 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5651 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5652 SelectContiguousMultiVectorLoad(
5653 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5654 else if (Subtarget->hasSVE2p1())
5655 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5656 AArch64::LD1D_4Z);
5657 else
5658 break;
5659 return;
5660 }
5661 break;
5662 }
5663 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5664 if (VT == MVT::nxv16i8) {
5665 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5666 SelectContiguousMultiVectorLoad(Node, 2, 0,
5667 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5668 AArch64::LDNT1B_2Z_PSEUDO);
5669 else if (Subtarget->hasSVE2p1())
5670 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5671 AArch64::LDNT1B_2Z);
5672 else
5673 break;
5674 return;
5675 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5676 VT == MVT::nxv8bf16) {
5677 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5678 SelectContiguousMultiVectorLoad(Node, 2, 1,
5679 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5680 AArch64::LDNT1H_2Z_PSEUDO);
5681 else if (Subtarget->hasSVE2p1())
5682 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5683 AArch64::LDNT1H_2Z);
5684 else
5685 break;
5686 return;
5687 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5688 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5689 SelectContiguousMultiVectorLoad(Node, 2, 2,
5690 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5691 AArch64::LDNT1W_2Z_PSEUDO);
5692 else if (Subtarget->hasSVE2p1())
5693 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5694 AArch64::LDNT1W_2Z);
5695 else
5696 break;
5697 return;
5698 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5699 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5700 SelectContiguousMultiVectorLoad(Node, 2, 3,
5701 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5702 AArch64::LDNT1D_2Z_PSEUDO);
5703 else if (Subtarget->hasSVE2p1())
5704 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5705 AArch64::LDNT1D_2Z);
5706 else
5707 break;
5708 return;
5709 }
5710 break;
5711 }
5712 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5713 if (VT == MVT::nxv16i8) {
5714 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5715 SelectContiguousMultiVectorLoad(Node, 4, 0,
5716 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5717 AArch64::LDNT1B_4Z_PSEUDO);
5718 else if (Subtarget->hasSVE2p1())
5719 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5720 AArch64::LDNT1B_4Z);
5721 else
5722 break;
5723 return;
5724 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5725 VT == MVT::nxv8bf16) {
5726 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5727 SelectContiguousMultiVectorLoad(Node, 4, 1,
5728 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5729 AArch64::LDNT1H_4Z_PSEUDO);
5730 else if (Subtarget->hasSVE2p1())
5731 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5732 AArch64::LDNT1H_4Z);
5733 else
5734 break;
5735 return;
5736 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5737 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5738 SelectContiguousMultiVectorLoad(Node, 4, 2,
5739 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5740 AArch64::LDNT1W_4Z_PSEUDO);
5741 else if (Subtarget->hasSVE2p1())
5742 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5743 AArch64::LDNT1W_4Z);
5744 else
5745 break;
5746 return;
5747 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5748 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5749 SelectContiguousMultiVectorLoad(Node, 4, 3,
5750 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5751 AArch64::LDNT1D_4Z_PSEUDO);
5752 else if (Subtarget->hasSVE2p1())
5753 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5754 AArch64::LDNT1D_4Z);
5755 else
5756 break;
5757 return;
5758 }
5759 break;
5760 }
5761 case Intrinsic::aarch64_sve_ld3_sret: {
5762 if (VT == MVT::nxv16i8) {
5763 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5764 true);
5765 return;
5766 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5767 VT == MVT::nxv8bf16) {
5768 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5769 true);
5770 return;
5771 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5772 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5773 true);
5774 return;
5775 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5776 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5777 true);
5778 return;
5779 }
5780 break;
5781 }
5782 case Intrinsic::aarch64_sve_ld4_sret: {
5783 if (VT == MVT::nxv16i8) {
5784 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5785 true);
5786 return;
5787 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5788 VT == MVT::nxv8bf16) {
5789 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5790 true);
5791 return;
5792 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5793 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5794 true);
5795 return;
5796 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5797 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5798 true);
5799 return;
5800 }
5801 break;
5802 }
5803 case Intrinsic::aarch64_sme_read_hor_vg2: {
5804 if (VT == MVT::nxv16i8) {
5805 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5806 AArch64::MOVA_2ZMXI_H_B);
5807 return;
5808 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5809 VT == MVT::nxv8bf16) {
5810 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5811 AArch64::MOVA_2ZMXI_H_H);
5812 return;
5813 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5814 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5815 AArch64::MOVA_2ZMXI_H_S);
5816 return;
5817 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5818 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5819 AArch64::MOVA_2ZMXI_H_D);
5820 return;
5821 }
5822 break;
5823 }
5824 case Intrinsic::aarch64_sme_read_ver_vg2: {
5825 if (VT == MVT::nxv16i8) {
5826 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5827 AArch64::MOVA_2ZMXI_V_B);
5828 return;
5829 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5830 VT == MVT::nxv8bf16) {
5831 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5832 AArch64::MOVA_2ZMXI_V_H);
5833 return;
5834 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5835 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5836 AArch64::MOVA_2ZMXI_V_S);
5837 return;
5838 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5839 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5840 AArch64::MOVA_2ZMXI_V_D);
5841 return;
5842 }
5843 break;
5844 }
5845 case Intrinsic::aarch64_sme_read_hor_vg4: {
5846 if (VT == MVT::nxv16i8) {
5847 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5848 AArch64::MOVA_4ZMXI_H_B);
5849 return;
5850 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5851 VT == MVT::nxv8bf16) {
5852 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5853 AArch64::MOVA_4ZMXI_H_H);
5854 return;
5855 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5856 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5857 AArch64::MOVA_4ZMXI_H_S);
5858 return;
5859 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5860 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5861 AArch64::MOVA_4ZMXI_H_D);
5862 return;
5863 }
5864 break;
5865 }
5866 case Intrinsic::aarch64_sme_read_ver_vg4: {
5867 if (VT == MVT::nxv16i8) {
5868 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5869 AArch64::MOVA_4ZMXI_V_B);
5870 return;
5871 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5872 VT == MVT::nxv8bf16) {
5873 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5874 AArch64::MOVA_4ZMXI_V_H);
5875 return;
5876 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5877 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5878 AArch64::MOVA_4ZMXI_V_S);
5879 return;
5880 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5881 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5882 AArch64::MOVA_4ZMXI_V_D);
5883 return;
5884 }
5885 break;
5886 }
5887 case Intrinsic::aarch64_sme_read_vg1x2: {
5888 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5889 AArch64::MOVA_VG2_2ZMXI);
5890 return;
5891 }
5892 case Intrinsic::aarch64_sme_read_vg1x4: {
5893 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5894 AArch64::MOVA_VG4_4ZMXI);
5895 return;
5896 }
5897 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5898 if (VT == MVT::nxv16i8) {
5899 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5900 return;
5901 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5902 VT == MVT::nxv8bf16) {
5903 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5904 return;
5905 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5906 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5907 return;
5908 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5909 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5910 return;
5911 }
5912 break;
5913 }
5914 case Intrinsic::aarch64_sme_readz_vert_x2: {
5915 if (VT == MVT::nxv16i8) {
5916 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5917 return;
5918 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5919 VT == MVT::nxv8bf16) {
5920 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5921 return;
5922 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5923 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5924 return;
5925 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5926 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5927 return;
5928 }
5929 break;
5930 }
5931 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5932 if (VT == MVT::nxv16i8) {
5933 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5934 return;
5935 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5936 VT == MVT::nxv8bf16) {
5937 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5938 return;
5939 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5940 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5941 return;
5942 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5943 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5944 return;
5945 }
5946 break;
5947 }
5948 case Intrinsic::aarch64_sme_readz_vert_x4: {
5949 if (VT == MVT::nxv16i8) {
5950 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5951 return;
5952 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5953 VT == MVT::nxv8bf16) {
5954 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5955 return;
5956 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5957 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5958 return;
5959 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5960 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5961 return;
5962 }
5963 break;
5964 }
5965 case Intrinsic::aarch64_sme_readz_x2: {
5966 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5967 AArch64::ZA);
5968 return;
5969 }
5970 case Intrinsic::aarch64_sme_readz_x4: {
5971 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5972 AArch64::ZA);
5973 return;
5974 }
5975 case Intrinsic::swift_async_context_addr: {
5976 SDLoc DL(Node);
5977 SDValue Chain = Node->getOperand(0);
5978 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5979 SDValue Res = SDValue(
5980 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5981 CurDAG->getTargetConstant(8, DL, MVT::i32),
5982 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5983 0);
5984 ReplaceUses(SDValue(Node, 0), Res);
5985 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5986 CurDAG->RemoveDeadNode(Node);
5987
5988 auto &MF = CurDAG->getMachineFunction();
5989 MF.getFrameInfo().setFrameAddressIsTaken(true);
5990 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5991 return;
5992 }
5993 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5995 Node->getValueType(0),
5996 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5997 AArch64::LUTI2_4ZTZI_S}))
5998 // Second Immediate must be <= 3:
5999 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
6000 return;
6001 }
6002 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
6004 Node->getValueType(0),
6005 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
6006 // Second Immediate must be <= 1:
6007 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
6008 return;
6009 }
6010 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
6012 Node->getValueType(0),
6013 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
6014 AArch64::LUTI2_2ZTZI_S}))
6015 // Second Immediate must be <= 7:
6016 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
6017 return;
6018 }
6019 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
6021 Node->getValueType(0),
6022 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
6023 AArch64::LUTI4_2ZTZI_S}))
6024 // Second Immediate must be <= 3:
6025 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
6026 return;
6027 }
6028 case Intrinsic::aarch64_sme_luti4_zt_x4: {
6029 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z, 2);
6030 return;
6031 }
6032 case Intrinsic::aarch64_sme_luti6_zt_x4: {
6033 SelectMultiVectorLuti(Node, 4, AArch64::LUTI6_4ZT3Z, 3);
6034 return;
6035 }
6036 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
6038 Node->getValueType(0),
6039 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
6040 SelectCVTIntrinsicFP8(Node, 2, Opc);
6041 return;
6042 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
6044 Node->getValueType(0),
6045 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
6046 SelectCVTIntrinsicFP8(Node, 2, Opc);
6047 return;
6048 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
6050 Node->getValueType(0),
6051 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
6052 SelectCVTIntrinsicFP8(Node, 2, Opc);
6053 return;
6054 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
6056 Node->getValueType(0),
6057 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
6058 SelectCVTIntrinsicFP8(Node, 2, Opc);
6059 return;
6060 case Intrinsic::ptrauth_resign_load_relative:
6061 SelectPtrauthResign(Node);
6062 return;
6063 }
6064 } break;
6066 unsigned IntNo = Node->getConstantOperandVal(0);
6067 switch (IntNo) {
6068 default:
6069 break;
6070 case Intrinsic::aarch64_tagp:
6071 SelectTagP(Node);
6072 return;
6073
6074 case Intrinsic::ptrauth_auth:
6075 SelectPtrauthAuth(Node);
6076 return;
6077
6078 case Intrinsic::ptrauth_resign:
6079 SelectPtrauthResign(Node);
6080 return;
6081
6082 case Intrinsic::aarch64_neon_tbl2:
6083 SelectTable(Node, 2,
6084 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
6085 false);
6086 return;
6087 case Intrinsic::aarch64_neon_tbl3:
6088 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
6089 : AArch64::TBLv16i8Three,
6090 false);
6091 return;
6092 case Intrinsic::aarch64_neon_tbl4:
6093 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
6094 : AArch64::TBLv16i8Four,
6095 false);
6096 return;
6097 case Intrinsic::aarch64_neon_tbx2:
6098 SelectTable(Node, 2,
6099 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
6100 true);
6101 return;
6102 case Intrinsic::aarch64_neon_tbx3:
6103 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
6104 : AArch64::TBXv16i8Three,
6105 true);
6106 return;
6107 case Intrinsic::aarch64_neon_tbx4:
6108 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
6109 : AArch64::TBXv16i8Four,
6110 true);
6111 return;
6112 case Intrinsic::aarch64_sve_srshl_single_x2:
6114 Node->getValueType(0),
6115 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
6116 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
6117 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6118 return;
6119 case Intrinsic::aarch64_sve_srshl_single_x4:
6121 Node->getValueType(0),
6122 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
6123 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
6124 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6125 return;
6126 case Intrinsic::aarch64_sme_luti6_lane_x4_x2:
6127 SelectMultiVectorLuti6LaneX4(Node, 2);
6128 return;
6129 case Intrinsic::aarch64_sme_luti6_lane_x4_x3:
6130 SelectMultiVectorLuti6LaneX4(Node, 3);
6131 return;
6132 case Intrinsic::aarch64_sve_urshl_single_x2:
6134 Node->getValueType(0),
6135 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
6136 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
6137 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6138 return;
6139 case Intrinsic::aarch64_sve_urshl_single_x4:
6141 Node->getValueType(0),
6142 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
6143 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
6144 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6145 return;
6146 case Intrinsic::aarch64_sve_srshl_x2:
6148 Node->getValueType(0),
6149 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
6150 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
6151 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6152 return;
6153 case Intrinsic::aarch64_sve_srshl_x4:
6155 Node->getValueType(0),
6156 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
6157 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
6158 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6159 return;
6160 case Intrinsic::aarch64_sve_urshl_x2:
6162 Node->getValueType(0),
6163 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
6164 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
6165 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6166 return;
6167 case Intrinsic::aarch64_sve_urshl_x4:
6169 Node->getValueType(0),
6170 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
6171 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
6172 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6173 return;
6174 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
6176 Node->getValueType(0),
6177 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
6178 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
6179 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6180 return;
6181 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
6183 Node->getValueType(0),
6184 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
6185 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
6186 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6187 return;
6188 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
6190 Node->getValueType(0),
6191 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
6192 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
6193 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6194 return;
6195 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
6197 Node->getValueType(0),
6198 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
6199 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
6200 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6201 return;
6202 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
6204 Node->getValueType(0),
6205 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
6206 AArch64::FSCALE_2ZZ_D}))
6207 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6208 return;
6209 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
6211 Node->getValueType(0),
6212 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
6213 AArch64::FSCALE_4ZZ_D}))
6214 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6215 return;
6216 case Intrinsic::aarch64_sme_fp8_scale_x2:
6218 Node->getValueType(0),
6219 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
6220 AArch64::FSCALE_2Z2Z_D}))
6221 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6222 return;
6223 case Intrinsic::aarch64_sme_fp8_scale_x4:
6225 Node->getValueType(0),
6226 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
6227 AArch64::FSCALE_4Z4Z_D}))
6228 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6229 return;
6230 case Intrinsic::aarch64_sve_whilege_x2:
6232 Node->getValueType(0),
6233 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
6234 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
6235 SelectWhilePair(Node, Op);
6236 return;
6237 case Intrinsic::aarch64_sve_whilegt_x2:
6239 Node->getValueType(0),
6240 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
6241 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
6242 SelectWhilePair(Node, Op);
6243 return;
6244 case Intrinsic::aarch64_sve_whilehi_x2:
6246 Node->getValueType(0),
6247 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
6248 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
6249 SelectWhilePair(Node, Op);
6250 return;
6251 case Intrinsic::aarch64_sve_whilehs_x2:
6253 Node->getValueType(0),
6254 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
6255 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
6256 SelectWhilePair(Node, Op);
6257 return;
6258 case Intrinsic::aarch64_sve_whilele_x2:
6260 Node->getValueType(0),
6261 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
6262 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
6263 SelectWhilePair(Node, Op);
6264 return;
6265 case Intrinsic::aarch64_sve_whilelo_x2:
6267 Node->getValueType(0),
6268 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
6269 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
6270 SelectWhilePair(Node, Op);
6271 return;
6272 case Intrinsic::aarch64_sve_whilels_x2:
6274 Node->getValueType(0),
6275 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
6276 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
6277 SelectWhilePair(Node, Op);
6278 return;
6279 case Intrinsic::aarch64_sve_whilelt_x2:
6281 Node->getValueType(0),
6282 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
6283 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
6284 SelectWhilePair(Node, Op);
6285 return;
6286 case Intrinsic::aarch64_sve_smax_single_x2:
6288 Node->getValueType(0),
6289 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6290 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6291 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6292 return;
6293 case Intrinsic::aarch64_sve_umax_single_x2:
6295 Node->getValueType(0),
6296 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6297 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6298 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6299 return;
6300 case Intrinsic::aarch64_sve_fmax_single_x2:
6302 Node->getValueType(0),
6303 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6304 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6305 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6306 return;
6307 case Intrinsic::aarch64_sve_smax_single_x4:
6309 Node->getValueType(0),
6310 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6311 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6312 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6313 return;
6314 case Intrinsic::aarch64_sve_umax_single_x4:
6316 Node->getValueType(0),
6317 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6318 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6319 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6320 return;
6321 case Intrinsic::aarch64_sve_fmax_single_x4:
6323 Node->getValueType(0),
6324 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6325 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6326 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6327 return;
6328 case Intrinsic::aarch64_sve_smin_single_x2:
6330 Node->getValueType(0),
6331 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6332 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6333 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6334 return;
6335 case Intrinsic::aarch64_sve_umin_single_x2:
6337 Node->getValueType(0),
6338 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6339 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6340 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6341 return;
6342 case Intrinsic::aarch64_sve_fmin_single_x2:
6344 Node->getValueType(0),
6345 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6346 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6347 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6348 return;
6349 case Intrinsic::aarch64_sve_smin_single_x4:
6351 Node->getValueType(0),
6352 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6353 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6354 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6355 return;
6356 case Intrinsic::aarch64_sve_umin_single_x4:
6358 Node->getValueType(0),
6359 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6360 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6361 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6362 return;
6363 case Intrinsic::aarch64_sve_fmin_single_x4:
6365 Node->getValueType(0),
6366 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6367 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6368 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6369 return;
6370 case Intrinsic::aarch64_sve_smax_x2:
6372 Node->getValueType(0),
6373 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6374 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6375 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6376 return;
6377 case Intrinsic::aarch64_sve_umax_x2:
6379 Node->getValueType(0),
6380 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6381 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6382 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6383 return;
6384 case Intrinsic::aarch64_sve_fmax_x2:
6386 Node->getValueType(0),
6387 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6388 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6389 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6390 return;
6391 case Intrinsic::aarch64_sve_smax_x4:
6393 Node->getValueType(0),
6394 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6395 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6396 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6397 return;
6398 case Intrinsic::aarch64_sve_umax_x4:
6400 Node->getValueType(0),
6401 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6402 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6403 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6404 return;
6405 case Intrinsic::aarch64_sve_fmax_x4:
6407 Node->getValueType(0),
6408 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6409 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6410 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6411 return;
6412 case Intrinsic::aarch64_sme_famax_x2:
6414 Node->getValueType(0),
6415 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6416 AArch64::FAMAX_2Z2Z_D}))
6417 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6418 return;
6419 case Intrinsic::aarch64_sme_famax_x4:
6421 Node->getValueType(0),
6422 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6423 AArch64::FAMAX_4Z4Z_D}))
6424 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6425 return;
6426 case Intrinsic::aarch64_sme_famin_x2:
6428 Node->getValueType(0),
6429 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6430 AArch64::FAMIN_2Z2Z_D}))
6431 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6432 return;
6433 case Intrinsic::aarch64_sme_famin_x4:
6435 Node->getValueType(0),
6436 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6437 AArch64::FAMIN_4Z4Z_D}))
6438 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6439 return;
6440 case Intrinsic::aarch64_sve_smin_x2:
6442 Node->getValueType(0),
6443 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6444 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6445 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6446 return;
6447 case Intrinsic::aarch64_sve_umin_x2:
6449 Node->getValueType(0),
6450 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6451 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6452 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6453 return;
6454 case Intrinsic::aarch64_sve_fmin_x2:
6456 Node->getValueType(0),
6457 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6458 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6459 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6460 return;
6461 case Intrinsic::aarch64_sve_smin_x4:
6463 Node->getValueType(0),
6464 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6465 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6466 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6467 return;
6468 case Intrinsic::aarch64_sve_umin_x4:
6470 Node->getValueType(0),
6471 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6472 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6473 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6474 return;
6475 case Intrinsic::aarch64_sve_fmin_x4:
6477 Node->getValueType(0),
6478 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6479 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6480 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6481 return;
6482 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6484 Node->getValueType(0),
6485 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6486 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6487 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6488 return;
6489 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6491 Node->getValueType(0),
6492 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6493 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6494 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6495 return;
6496 case Intrinsic::aarch64_sve_fminnm_single_x2:
6498 Node->getValueType(0),
6499 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6500 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6501 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6502 return;
6503 case Intrinsic::aarch64_sve_fminnm_single_x4:
6505 Node->getValueType(0),
6506 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6507 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6508 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6509 return;
6510 case Intrinsic::aarch64_sve_fscale_single_x4:
6511 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::BFSCALE_4ZZ);
6512 return;
6513 case Intrinsic::aarch64_sve_fscale_single_x2:
6514 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::BFSCALE_2ZZ);
6515 return;
6516 case Intrinsic::aarch64_sve_fmul_single_x4:
6518 Node->getValueType(0),
6519 {AArch64::BFMUL_4ZZ, AArch64::FMUL_4ZZ_H, AArch64::FMUL_4ZZ_S,
6520 AArch64::FMUL_4ZZ_D}))
6521 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6522 return;
6523 case Intrinsic::aarch64_sve_fmul_single_x2:
6525 Node->getValueType(0),
6526 {AArch64::BFMUL_2ZZ, AArch64::FMUL_2ZZ_H, AArch64::FMUL_2ZZ_S,
6527 AArch64::FMUL_2ZZ_D}))
6528 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6529 return;
6530 case Intrinsic::aarch64_sve_fmaxnm_x2:
6532 Node->getValueType(0),
6533 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6534 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6535 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6536 return;
6537 case Intrinsic::aarch64_sve_fmaxnm_x4:
6539 Node->getValueType(0),
6540 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6541 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6542 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6543 return;
6544 case Intrinsic::aarch64_sve_fminnm_x2:
6546 Node->getValueType(0),
6547 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6548 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6549 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6550 return;
6551 case Intrinsic::aarch64_sve_fminnm_x4:
6553 Node->getValueType(0),
6554 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6555 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6556 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6557 return;
6558 case Intrinsic::aarch64_sve_aese_lane_x2:
6559 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESE_2ZZI_B);
6560 return;
6561 case Intrinsic::aarch64_sve_aesd_lane_x2:
6562 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESD_2ZZI_B);
6563 return;
6564 case Intrinsic::aarch64_sve_aesemc_lane_x2:
6565 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESEMC_2ZZI_B);
6566 return;
6567 case Intrinsic::aarch64_sve_aesdimc_lane_x2:
6568 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESDIMC_2ZZI_B);
6569 return;
6570 case Intrinsic::aarch64_sve_aese_lane_x4:
6571 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESE_4ZZI_B);
6572 return;
6573 case Intrinsic::aarch64_sve_aesd_lane_x4:
6574 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESD_4ZZI_B);
6575 return;
6576 case Intrinsic::aarch64_sve_aesemc_lane_x4:
6577 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESEMC_4ZZI_B);
6578 return;
6579 case Intrinsic::aarch64_sve_aesdimc_lane_x4:
6580 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESDIMC_4ZZI_B);
6581 return;
6582 case Intrinsic::aarch64_sve_pmlal_pair_x2:
6583 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::PMLAL_2ZZZ_Q);
6584 return;
6585 case Intrinsic::aarch64_sve_pmull_pair_x2: {
6586 SDLoc DL(Node);
6587 SmallVector<SDValue, 4> Regs(Node->ops().slice(1, 2));
6588 SDNode *Res =
6589 CurDAG->getMachineNode(AArch64::PMULL_2ZZZ_Q, DL, MVT::Untyped, Regs);
6590 SDValue SuperReg = SDValue(Res, 0);
6591 for (unsigned I = 0; I < 2; I++)
6592 ReplaceUses(SDValue(Node, I),
6593 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
6594 SuperReg));
6595 CurDAG->RemoveDeadNode(Node);
6596 return;
6597 }
6598 case Intrinsic::aarch64_sve_fscale_x4:
6599 SelectDestructiveMultiIntrinsic(Node, 4, true, AArch64::BFSCALE_4Z4Z);
6600 return;
6601 case Intrinsic::aarch64_sve_fscale_x2:
6602 SelectDestructiveMultiIntrinsic(Node, 2, true, AArch64::BFSCALE_2Z2Z);
6603 return;
6604 case Intrinsic::aarch64_sve_fmul_x4:
6606 Node->getValueType(0),
6607 {AArch64::BFMUL_4Z4Z, AArch64::FMUL_4Z4Z_H, AArch64::FMUL_4Z4Z_S,
6608 AArch64::FMUL_4Z4Z_D}))
6609 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6610 return;
6611 case Intrinsic::aarch64_sve_fmul_x2:
6613 Node->getValueType(0),
6614 {AArch64::BFMUL_2Z2Z, AArch64::FMUL_2Z2Z_H, AArch64::FMUL_2Z2Z_S,
6615 AArch64::FMUL_2Z2Z_D}))
6616 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6617 return;
6618 case Intrinsic::aarch64_sve_fcvtzs_x2:
6619 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6620 return;
6621 case Intrinsic::aarch64_sve_scvtf_x2:
6622 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6623 return;
6624 case Intrinsic::aarch64_sve_fcvtzu_x2:
6625 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6626 return;
6627 case Intrinsic::aarch64_sve_ucvtf_x2:
6628 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6629 return;
6630 case Intrinsic::aarch64_sve_fcvtzs_x4:
6631 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6632 return;
6633 case Intrinsic::aarch64_sve_scvtf_x4:
6634 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6635 return;
6636 case Intrinsic::aarch64_sve_fcvtzu_x4:
6637 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6638 return;
6639 case Intrinsic::aarch64_sve_ucvtf_x4:
6640 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6641 return;
6642 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6643 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6644 return;
6645 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6646 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6647 return;
6648 case Intrinsic::aarch64_sve_sclamp_single_x2:
6650 Node->getValueType(0),
6651 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6652 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6653 SelectClamp(Node, 2, Op);
6654 return;
6655 case Intrinsic::aarch64_sve_uclamp_single_x2:
6657 Node->getValueType(0),
6658 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6659 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6660 SelectClamp(Node, 2, Op);
6661 return;
6662 case Intrinsic::aarch64_sve_fclamp_single_x2:
6664 Node->getValueType(0),
6665 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6666 AArch64::FCLAMP_VG2_2Z2Z_D}))
6667 SelectClamp(Node, 2, Op);
6668 return;
6669 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6670 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6671 return;
6672 case Intrinsic::aarch64_sve_sclamp_single_x4:
6674 Node->getValueType(0),
6675 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6676 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6677 SelectClamp(Node, 4, Op);
6678 return;
6679 case Intrinsic::aarch64_sve_uclamp_single_x4:
6681 Node->getValueType(0),
6682 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6683 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6684 SelectClamp(Node, 4, Op);
6685 return;
6686 case Intrinsic::aarch64_sve_fclamp_single_x4:
6688 Node->getValueType(0),
6689 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6690 AArch64::FCLAMP_VG4_4Z4Z_D}))
6691 SelectClamp(Node, 4, Op);
6692 return;
6693 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6694 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6695 return;
6696 case Intrinsic::aarch64_sve_add_single_x2:
6698 Node->getValueType(0),
6699 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6700 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6701 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6702 return;
6703 case Intrinsic::aarch64_sve_add_single_x4:
6705 Node->getValueType(0),
6706 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6707 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6708 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6709 return;
6710 case Intrinsic::aarch64_sve_zip_x2:
6712 Node->getValueType(0),
6713 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6714 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6715 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6716 return;
6717 case Intrinsic::aarch64_sve_zipq_x2:
6718 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6719 AArch64::ZIP_VG2_2ZZZ_Q);
6720 return;
6721 case Intrinsic::aarch64_sve_zip_x4:
6723 Node->getValueType(0),
6724 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6725 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6726 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6727 return;
6728 case Intrinsic::aarch64_sve_zipq_x4:
6729 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6730 AArch64::ZIP_VG4_4Z4Z_Q);
6731 return;
6732 case Intrinsic::aarch64_sve_uzp_x2:
6734 Node->getValueType(0),
6735 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6736 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6737 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6738 return;
6739 case Intrinsic::aarch64_sve_uzpq_x2:
6740 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6741 AArch64::UZP_VG2_2ZZZ_Q);
6742 return;
6743 case Intrinsic::aarch64_sve_uzp_x4:
6745 Node->getValueType(0),
6746 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6747 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6748 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6749 return;
6750 case Intrinsic::aarch64_sve_uzpq_x4:
6751 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6752 AArch64::UZP_VG4_4Z4Z_Q);
6753 return;
6754 case Intrinsic::aarch64_sve_sel_x2:
6756 Node->getValueType(0),
6757 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6758 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6759 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6760 return;
6761 case Intrinsic::aarch64_sve_sel_x4:
6763 Node->getValueType(0),
6764 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6765 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6766 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6767 return;
6768 case Intrinsic::aarch64_sve_frinta_x2:
6769 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6770 return;
6771 case Intrinsic::aarch64_sve_frinta_x4:
6772 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6773 return;
6774 case Intrinsic::aarch64_sve_frintm_x2:
6775 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6776 return;
6777 case Intrinsic::aarch64_sve_frintm_x4:
6778 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6779 return;
6780 case Intrinsic::aarch64_sve_frintn_x2:
6781 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6782 return;
6783 case Intrinsic::aarch64_sve_frintn_x4:
6784 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6785 return;
6786 case Intrinsic::aarch64_sve_frintp_x2:
6787 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6788 return;
6789 case Intrinsic::aarch64_sve_frintp_x4:
6790 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6791 return;
6792 case Intrinsic::aarch64_sve_sunpk_x2:
6794 Node->getValueType(0),
6795 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6796 AArch64::SUNPK_VG2_2ZZ_D}))
6797 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6798 return;
6799 case Intrinsic::aarch64_sve_uunpk_x2:
6801 Node->getValueType(0),
6802 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6803 AArch64::UUNPK_VG2_2ZZ_D}))
6804 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6805 return;
6806 case Intrinsic::aarch64_sve_sunpk_x4:
6808 Node->getValueType(0),
6809 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6810 AArch64::SUNPK_VG4_4Z2Z_D}))
6811 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6812 return;
6813 case Intrinsic::aarch64_sve_uunpk_x4:
6815 Node->getValueType(0),
6816 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6817 AArch64::UUNPK_VG4_4Z2Z_D}))
6818 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6819 return;
6820 case Intrinsic::aarch64_sve_pext_x2: {
6822 Node->getValueType(0),
6823 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6824 AArch64::PEXT_2PCI_D}))
6825 SelectPExtPair(Node, Op);
6826 return;
6827 }
6828 }
6829 break;
6830 }
6831 case ISD::INTRINSIC_VOID: {
6832 unsigned IntNo = Node->getConstantOperandVal(1);
6833 if (Node->getNumOperands() >= 3)
6834 VT = Node->getOperand(2)->getValueType(0);
6835 switch (IntNo) {
6836 default:
6837 break;
6838 case Intrinsic::aarch64_neon_st1x2: {
6839 if (VT == MVT::v8i8) {
6840 SelectStore(Node, 2, AArch64::ST1Twov8b);
6841 return;
6842 } else if (VT == MVT::v16i8) {
6843 SelectStore(Node, 2, AArch64::ST1Twov16b);
6844 return;
6845 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6846 VT == MVT::v4bf16) {
6847 SelectStore(Node, 2, AArch64::ST1Twov4h);
6848 return;
6849 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6850 VT == MVT::v8bf16) {
6851 SelectStore(Node, 2, AArch64::ST1Twov8h);
6852 return;
6853 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6854 SelectStore(Node, 2, AArch64::ST1Twov2s);
6855 return;
6856 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6857 SelectStore(Node, 2, AArch64::ST1Twov4s);
6858 return;
6859 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6860 SelectStore(Node, 2, AArch64::ST1Twov2d);
6861 return;
6862 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6863 SelectStore(Node, 2, AArch64::ST1Twov1d);
6864 return;
6865 }
6866 break;
6867 }
6868 case Intrinsic::aarch64_neon_st1x3: {
6869 if (VT == MVT::v8i8) {
6870 SelectStore(Node, 3, AArch64::ST1Threev8b);
6871 return;
6872 } else if (VT == MVT::v16i8) {
6873 SelectStore(Node, 3, AArch64::ST1Threev16b);
6874 return;
6875 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6876 VT == MVT::v4bf16) {
6877 SelectStore(Node, 3, AArch64::ST1Threev4h);
6878 return;
6879 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6880 VT == MVT::v8bf16) {
6881 SelectStore(Node, 3, AArch64::ST1Threev8h);
6882 return;
6883 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6884 SelectStore(Node, 3, AArch64::ST1Threev2s);
6885 return;
6886 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6887 SelectStore(Node, 3, AArch64::ST1Threev4s);
6888 return;
6889 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6890 SelectStore(Node, 3, AArch64::ST1Threev2d);
6891 return;
6892 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6893 SelectStore(Node, 3, AArch64::ST1Threev1d);
6894 return;
6895 }
6896 break;
6897 }
6898 case Intrinsic::aarch64_neon_st1x4: {
6899 if (VT == MVT::v8i8) {
6900 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6901 return;
6902 } else if (VT == MVT::v16i8) {
6903 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6904 return;
6905 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6906 VT == MVT::v4bf16) {
6907 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6908 return;
6909 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6910 VT == MVT::v8bf16) {
6911 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6912 return;
6913 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6914 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6915 return;
6916 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6917 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6918 return;
6919 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6920 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6921 return;
6922 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6923 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6924 return;
6925 }
6926 break;
6927 }
6928 case Intrinsic::aarch64_neon_st2: {
6929 if (VT == MVT::v8i8) {
6930 SelectStore(Node, 2, AArch64::ST2Twov8b);
6931 return;
6932 } else if (VT == MVT::v16i8) {
6933 SelectStore(Node, 2, AArch64::ST2Twov16b);
6934 return;
6935 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6936 VT == MVT::v4bf16) {
6937 SelectStore(Node, 2, AArch64::ST2Twov4h);
6938 return;
6939 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6940 VT == MVT::v8bf16) {
6941 SelectStore(Node, 2, AArch64::ST2Twov8h);
6942 return;
6943 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6944 SelectStore(Node, 2, AArch64::ST2Twov2s);
6945 return;
6946 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6947 SelectStore(Node, 2, AArch64::ST2Twov4s);
6948 return;
6949 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6950 SelectStore(Node, 2, AArch64::ST2Twov2d);
6951 return;
6952 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6953 SelectStore(Node, 2, AArch64::ST1Twov1d);
6954 return;
6955 }
6956 break;
6957 }
6958 case Intrinsic::aarch64_neon_st3: {
6959 if (VT == MVT::v8i8) {
6960 SelectStore(Node, 3, AArch64::ST3Threev8b);
6961 return;
6962 } else if (VT == MVT::v16i8) {
6963 SelectStore(Node, 3, AArch64::ST3Threev16b);
6964 return;
6965 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6966 VT == MVT::v4bf16) {
6967 SelectStore(Node, 3, AArch64::ST3Threev4h);
6968 return;
6969 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6970 VT == MVT::v8bf16) {
6971 SelectStore(Node, 3, AArch64::ST3Threev8h);
6972 return;
6973 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6974 SelectStore(Node, 3, AArch64::ST3Threev2s);
6975 return;
6976 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6977 SelectStore(Node, 3, AArch64::ST3Threev4s);
6978 return;
6979 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6980 SelectStore(Node, 3, AArch64::ST3Threev2d);
6981 return;
6982 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6983 SelectStore(Node, 3, AArch64::ST1Threev1d);
6984 return;
6985 }
6986 break;
6987 }
6988 case Intrinsic::aarch64_neon_st4: {
6989 if (VT == MVT::v8i8) {
6990 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6991 return;
6992 } else if (VT == MVT::v16i8) {
6993 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6994 return;
6995 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6996 VT == MVT::v4bf16) {
6997 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6998 return;
6999 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
7000 VT == MVT::v8bf16) {
7001 SelectStore(Node, 4, AArch64::ST4Fourv8h);
7002 return;
7003 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7004 SelectStore(Node, 4, AArch64::ST4Fourv2s);
7005 return;
7006 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7007 SelectStore(Node, 4, AArch64::ST4Fourv4s);
7008 return;
7009 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7010 SelectStore(Node, 4, AArch64::ST4Fourv2d);
7011 return;
7012 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7013 SelectStore(Node, 4, AArch64::ST1Fourv1d);
7014 return;
7015 }
7016 break;
7017 }
7018 case Intrinsic::aarch64_neon_st2lane: {
7019 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7020 SelectStoreLane(Node, 2, AArch64::ST2i8);
7021 return;
7022 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7023 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7024 SelectStoreLane(Node, 2, AArch64::ST2i16);
7025 return;
7026 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7027 VT == MVT::v2f32) {
7028 SelectStoreLane(Node, 2, AArch64::ST2i32);
7029 return;
7030 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7031 VT == MVT::v1f64) {
7032 SelectStoreLane(Node, 2, AArch64::ST2i64);
7033 return;
7034 }
7035 break;
7036 }
7037 case Intrinsic::aarch64_neon_st3lane: {
7038 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7039 SelectStoreLane(Node, 3, AArch64::ST3i8);
7040 return;
7041 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7042 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7043 SelectStoreLane(Node, 3, AArch64::ST3i16);
7044 return;
7045 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7046 VT == MVT::v2f32) {
7047 SelectStoreLane(Node, 3, AArch64::ST3i32);
7048 return;
7049 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7050 VT == MVT::v1f64) {
7051 SelectStoreLane(Node, 3, AArch64::ST3i64);
7052 return;
7053 }
7054 break;
7055 }
7056 case Intrinsic::aarch64_neon_st4lane: {
7057 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7058 SelectStoreLane(Node, 4, AArch64::ST4i8);
7059 return;
7060 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7061 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7062 SelectStoreLane(Node, 4, AArch64::ST4i16);
7063 return;
7064 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7065 VT == MVT::v2f32) {
7066 SelectStoreLane(Node, 4, AArch64::ST4i32);
7067 return;
7068 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7069 VT == MVT::v1f64) {
7070 SelectStoreLane(Node, 4, AArch64::ST4i64);
7071 return;
7072 }
7073 break;
7074 }
7075 case Intrinsic::aarch64_sve_st2q: {
7076 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
7077 return;
7078 }
7079 case Intrinsic::aarch64_sve_st3q: {
7080 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
7081 return;
7082 }
7083 case Intrinsic::aarch64_sve_st4q: {
7084 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
7085 return;
7086 }
7087 case Intrinsic::aarch64_sve_st2: {
7088 if (VT == MVT::nxv16i8) {
7089 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
7090 return;
7091 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7092 VT == MVT::nxv8bf16) {
7093 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
7094 return;
7095 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7096 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
7097 return;
7098 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7099 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
7100 return;
7101 }
7102 break;
7103 }
7104 case Intrinsic::aarch64_sve_st3: {
7105 if (VT == MVT::nxv16i8) {
7106 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
7107 return;
7108 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7109 VT == MVT::nxv8bf16) {
7110 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
7111 return;
7112 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7113 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
7114 return;
7115 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7116 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
7117 return;
7118 }
7119 break;
7120 }
7121 case Intrinsic::aarch64_sve_st4: {
7122 if (VT == MVT::nxv16i8) {
7123 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
7124 return;
7125 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7126 VT == MVT::nxv8bf16) {
7127 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
7128 return;
7129 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7130 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
7131 return;
7132 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7133 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
7134 return;
7135 }
7136 break;
7137 }
7138 }
7139 break;
7140 }
7141 case AArch64ISD::LD2post: {
7142 if (VT == MVT::v8i8) {
7143 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
7144 return;
7145 } else if (VT == MVT::v16i8) {
7146 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
7147 return;
7148 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7149 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
7150 return;
7151 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7152 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
7153 return;
7154 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7155 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
7156 return;
7157 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7158 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
7159 return;
7160 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7161 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
7162 return;
7163 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7164 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
7165 return;
7166 }
7167 break;
7168 }
7169 case AArch64ISD::LD3post: {
7170 if (VT == MVT::v8i8) {
7171 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
7172 return;
7173 } else if (VT == MVT::v16i8) {
7174 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
7175 return;
7176 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7177 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
7178 return;
7179 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7180 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
7181 return;
7182 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7183 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
7184 return;
7185 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7186 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
7187 return;
7188 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7189 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
7190 return;
7191 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7192 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
7193 return;
7194 }
7195 break;
7196 }
7197 case AArch64ISD::LD4post: {
7198 if (VT == MVT::v8i8) {
7199 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
7200 return;
7201 } else if (VT == MVT::v16i8) {
7202 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
7203 return;
7204 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7205 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
7206 return;
7207 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7208 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
7209 return;
7210 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7211 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
7212 return;
7213 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7214 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
7215 return;
7216 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7217 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
7218 return;
7219 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7220 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
7221 return;
7222 }
7223 break;
7224 }
7225 case AArch64ISD::LD1x2post: {
7226 if (VT == MVT::v8i8) {
7227 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
7228 return;
7229 } else if (VT == MVT::v16i8) {
7230 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
7231 return;
7232 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7233 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
7234 return;
7235 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7236 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
7237 return;
7238 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7239 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
7240 return;
7241 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7242 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
7243 return;
7244 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7245 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
7246 return;
7247 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7248 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
7249 return;
7250 }
7251 break;
7252 }
7253 case AArch64ISD::LD1x3post: {
7254 if (VT == MVT::v8i8) {
7255 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
7256 return;
7257 } else if (VT == MVT::v16i8) {
7258 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
7259 return;
7260 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7261 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
7262 return;
7263 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7264 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
7265 return;
7266 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7267 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
7268 return;
7269 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7270 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
7271 return;
7272 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7273 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
7274 return;
7275 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7276 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
7277 return;
7278 }
7279 break;
7280 }
7281 case AArch64ISD::LD1x4post: {
7282 if (VT == MVT::v8i8) {
7283 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
7284 return;
7285 } else if (VT == MVT::v16i8) {
7286 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
7287 return;
7288 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7289 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
7290 return;
7291 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7292 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
7293 return;
7294 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7295 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
7296 return;
7297 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7298 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
7299 return;
7300 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7301 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
7302 return;
7303 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7304 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
7305 return;
7306 }
7307 break;
7308 }
7309 case AArch64ISD::LD1DUPpost: {
7310 if (VT == MVT::v8i8) {
7311 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
7312 return;
7313 } else if (VT == MVT::v16i8) {
7314 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
7315 return;
7316 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7317 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
7318 return;
7319 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7320 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
7321 return;
7322 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7323 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
7324 return;
7325 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7326 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
7327 return;
7328 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7329 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
7330 return;
7331 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7332 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
7333 return;
7334 }
7335 break;
7336 }
7337 case AArch64ISD::LD2DUPpost: {
7338 if (VT == MVT::v8i8) {
7339 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
7340 return;
7341 } else if (VT == MVT::v16i8) {
7342 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
7343 return;
7344 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7345 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
7346 return;
7347 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7348 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
7349 return;
7350 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7351 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
7352 return;
7353 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7354 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
7355 return;
7356 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7357 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
7358 return;
7359 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7360 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
7361 return;
7362 }
7363 break;
7364 }
7365 case AArch64ISD::LD3DUPpost: {
7366 if (VT == MVT::v8i8) {
7367 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
7368 return;
7369 } else if (VT == MVT::v16i8) {
7370 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7371 return;
7372 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7373 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7374 return;
7375 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7376 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7377 return;
7378 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7379 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7380 return;
7381 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7382 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7383 return;
7384 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7385 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7386 return;
7387 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7388 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7389 return;
7390 }
7391 break;
7392 }
7393 case AArch64ISD::LD4DUPpost: {
7394 if (VT == MVT::v8i8) {
7395 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7396 return;
7397 } else if (VT == MVT::v16i8) {
7398 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7399 return;
7400 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7401 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7402 return;
7403 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7404 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7405 return;
7406 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7407 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7408 return;
7409 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7410 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7411 return;
7412 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7413 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7414 return;
7415 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7416 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7417 return;
7418 }
7419 break;
7420 }
7421 case AArch64ISD::LD1LANEpost: {
7422 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7423 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7424 return;
7425 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7426 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7427 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7428 return;
7429 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7430 VT == MVT::v2f32) {
7431 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7432 return;
7433 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7434 VT == MVT::v1f64) {
7435 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7436 return;
7437 }
7438 break;
7439 }
7440 case AArch64ISD::LD2LANEpost: {
7441 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7442 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7443 return;
7444 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7445 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7446 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7447 return;
7448 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7449 VT == MVT::v2f32) {
7450 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7451 return;
7452 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7453 VT == MVT::v1f64) {
7454 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7455 return;
7456 }
7457 break;
7458 }
7459 case AArch64ISD::LD3LANEpost: {
7460 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7461 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7462 return;
7463 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7464 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7465 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7466 return;
7467 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7468 VT == MVT::v2f32) {
7469 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7470 return;
7471 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7472 VT == MVT::v1f64) {
7473 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7474 return;
7475 }
7476 break;
7477 }
7478 case AArch64ISD::LD4LANEpost: {
7479 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7480 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7481 return;
7482 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7483 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7484 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7485 return;
7486 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7487 VT == MVT::v2f32) {
7488 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7489 return;
7490 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7491 VT == MVT::v1f64) {
7492 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7493 return;
7494 }
7495 break;
7496 }
7497 case AArch64ISD::ST2post: {
7498 VT = Node->getOperand(1).getValueType();
7499 if (VT == MVT::v8i8) {
7500 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7501 return;
7502 } else if (VT == MVT::v16i8) {
7503 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7504 return;
7505 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7506 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7507 return;
7508 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7509 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7510 return;
7511 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7512 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7513 return;
7514 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7515 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7516 return;
7517 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7518 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7519 return;
7520 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7521 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7522 return;
7523 }
7524 break;
7525 }
7526 case AArch64ISD::ST3post: {
7527 VT = Node->getOperand(1).getValueType();
7528 if (VT == MVT::v8i8) {
7529 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7530 return;
7531 } else if (VT == MVT::v16i8) {
7532 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7533 return;
7534 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7535 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7536 return;
7537 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7538 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7539 return;
7540 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7541 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7542 return;
7543 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7544 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7545 return;
7546 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7547 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7548 return;
7549 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7550 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7551 return;
7552 }
7553 break;
7554 }
7555 case AArch64ISD::ST4post: {
7556 VT = Node->getOperand(1).getValueType();
7557 if (VT == MVT::v8i8) {
7558 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7559 return;
7560 } else if (VT == MVT::v16i8) {
7561 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7562 return;
7563 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7564 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7565 return;
7566 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7567 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7568 return;
7569 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7570 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7571 return;
7572 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7573 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7574 return;
7575 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7576 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7577 return;
7578 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7579 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7580 return;
7581 }
7582 break;
7583 }
7584 case AArch64ISD::ST1x2post: {
7585 VT = Node->getOperand(1).getValueType();
7586 if (VT == MVT::v8i8) {
7587 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7588 return;
7589 } else if (VT == MVT::v16i8) {
7590 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7591 return;
7592 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7593 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7594 return;
7595 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7596 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7597 return;
7598 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7599 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7600 return;
7601 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7602 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7603 return;
7604 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7605 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7606 return;
7607 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7608 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7609 return;
7610 }
7611 break;
7612 }
7613 case AArch64ISD::ST1x3post: {
7614 VT = Node->getOperand(1).getValueType();
7615 if (VT == MVT::v8i8) {
7616 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7617 return;
7618 } else if (VT == MVT::v16i8) {
7619 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7620 return;
7621 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7622 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7623 return;
7624 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7625 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7626 return;
7627 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7628 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7629 return;
7630 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7631 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7632 return;
7633 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7634 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7635 return;
7636 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7637 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7638 return;
7639 }
7640 break;
7641 }
7642 case AArch64ISD::ST1x4post: {
7643 VT = Node->getOperand(1).getValueType();
7644 if (VT == MVT::v8i8) {
7645 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7646 return;
7647 } else if (VT == MVT::v16i8) {
7648 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7649 return;
7650 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7651 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7652 return;
7653 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7654 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7655 return;
7656 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7657 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7658 return;
7659 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7660 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7661 return;
7662 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7663 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7664 return;
7665 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7666 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7667 return;
7668 }
7669 break;
7670 }
7671 case AArch64ISD::ST2LANEpost: {
7672 VT = Node->getOperand(1).getValueType();
7673 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7674 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7675 return;
7676 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7677 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7678 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7679 return;
7680 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7681 VT == MVT::v2f32) {
7682 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7683 return;
7684 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7685 VT == MVT::v1f64) {
7686 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7687 return;
7688 }
7689 break;
7690 }
7691 case AArch64ISD::ST3LANEpost: {
7692 VT = Node->getOperand(1).getValueType();
7693 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7694 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7695 return;
7696 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7697 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7698 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7699 return;
7700 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7701 VT == MVT::v2f32) {
7702 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7703 return;
7704 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7705 VT == MVT::v1f64) {
7706 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7707 return;
7708 }
7709 break;
7710 }
7711 case AArch64ISD::ST4LANEpost: {
7712 VT = Node->getOperand(1).getValueType();
7713 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7714 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7715 return;
7716 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7717 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7718 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7719 return;
7720 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7721 VT == MVT::v2f32) {
7722 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7723 return;
7724 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7725 VT == MVT::v1f64) {
7726 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7727 return;
7728 }
7729 break;
7730 }
7731 }
7732
7733 // Select the default instruction
7734 SelectCode(Node);
7735}
7736
7737/// createAArch64ISelDag - This pass converts a legalized DAG into a
7738/// AArch64-specific DAG, ready for instruction scheduling.
7740 CodeGenOptLevel OptLevel) {
7741 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7742}
7743
7744/// When \p PredVT is a scalable vector predicate in the form
7745/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7746/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7747/// structured vectors (NumVec >1), the output data type is
7748/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7749/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7750/// EVT.
7752 unsigned NumVec) {
7753 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7754 if (!PredVT.isScalableVectorOf(MVT::i1))
7755 return EVT();
7756
7757 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7758 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7759 return EVT();
7760
7761 ElementCount EC = PredVT.getVectorElementCount();
7762 EVT ScalarVT =
7763 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7764 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7765
7766 return MemVT;
7767}
7768
7769/// Builds an integer vector type large enough to hold \p NumVec instances
7770/// of \p VecVT.
7771static EVT getMultipleVectorType(LLVMContext &Ctx, EVT VecVT, unsigned NumVec) {
7773 VecVT.getVectorElementCount() * NumVec);
7774}
7775
7776/// Return the EVT of the data associated to a memory operation in \p
7777/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7779 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7780 return MemIntr->getMemoryVT();
7781
7782 if (isa<MemSDNode>(Root)) {
7783 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7784
7785 EVT DataVT;
7786 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7787 DataVT = Load->getValueType(0);
7788 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7789 DataVT = Load->getValueType(0);
7790 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7791 DataVT = Store->getValue().getValueType();
7792 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7793 DataVT = Store->getValue().getValueType();
7794 else
7795 llvm_unreachable("Unexpected MemSDNode!");
7796
7797 return DataVT.changeVectorElementType(Ctx, MemVT.getVectorElementType());
7798 }
7799
7800 const unsigned Opcode = Root->getOpcode();
7801 // For custom ISD nodes, we have to look at them individually to extract the
7802 // type of the data moved to/from memory.
7803 switch (Opcode) {
7804 case AArch64ISD::LD1_MERGE_ZERO:
7805 case AArch64ISD::LD1S_MERGE_ZERO:
7806 case AArch64ISD::LDNF1_MERGE_ZERO:
7807 case AArch64ISD::LDNF1S_MERGE_ZERO:
7808 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7809 case AArch64ISD::ST1_PRED:
7810 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7811 default:
7812 break;
7813 }
7814
7815 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7816 return EVT();
7817
7818 switch (Root->getConstantOperandVal(1)) {
7819 default:
7820 return EVT();
7821 case Intrinsic::aarch64_sme_ldr:
7822 case Intrinsic::aarch64_sme_str:
7823 return MVT::nxv16i8;
7824 case Intrinsic::aarch64_sve_prf:
7825 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7826 // width of the predicate.
7828 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7829 case Intrinsic::aarch64_sve_ld2_sret:
7830 case Intrinsic::aarch64_sve_ld2q_sret:
7832 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7833 case Intrinsic::aarch64_sve_st2q:
7835 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7836 case Intrinsic::aarch64_sve_ld3_sret:
7837 case Intrinsic::aarch64_sve_ld3q_sret:
7839 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7840 case Intrinsic::aarch64_sve_st3q:
7842 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7843 case Intrinsic::aarch64_sve_ld4_sret:
7844 case Intrinsic::aarch64_sve_ld4q_sret:
7846 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7847 case Intrinsic::aarch64_sve_st4q:
7849 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7850 case Intrinsic::aarch64_sve_ld1_pn_x2:
7851 case Intrinsic::aarch64_sve_ldnt1_pn_x2:
7852 return getMultipleVectorType(Ctx, Root->getValueType(0),
7853 /*NumVec=*/2);
7854 case Intrinsic::aarch64_sve_ld1_pn_x4:
7855 case Intrinsic::aarch64_sve_ldnt1_pn_x4:
7856 return getMultipleVectorType(Ctx, Root->getValueType(0),
7857 /*NumVec=*/4);
7858 case Intrinsic::aarch64_sve_st1_pn_x2:
7859 case Intrinsic::aarch64_sve_stnt1_pn_x2:
7860 return getMultipleVectorType(Ctx, Root->getOperand(2).getValueType(),
7861 /*NumVec=*/2);
7862 case Intrinsic::aarch64_sve_st1_pn_x4:
7863 case Intrinsic::aarch64_sve_stnt1_pn_x4:
7864 return getMultipleVectorType(Ctx, Root->getOperand(2).getValueType(),
7865 /*NumVec=*/4);
7866 case Intrinsic::aarch64_sve_ld1udq:
7867 case Intrinsic::aarch64_sve_st1dq:
7868 return EVT(MVT::nxv1i64);
7869 case Intrinsic::aarch64_sve_ld1uwq:
7870 case Intrinsic::aarch64_sve_st1wq:
7871 return EVT(MVT::nxv1i32);
7872 }
7873}
7874
7875/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7876/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7877/// where Root is the memory access using N for its address.
7878template <int64_t Min, int64_t Max>
7879bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7880 SDValue &Base,
7881 SDValue &OffImm) {
7882 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7883 const DataLayout &DL = CurDAG->getDataLayout();
7884 const MachineFrameInfo &MFI = MF->getFrameInfo();
7885
7886 if (N.getOpcode() == ISD::FrameIndex) {
7887 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7888 // We can only encode VL scaled offsets, so only fold in frame indexes
7889 // referencing SVE objects.
7890 if (MFI.hasScalableStackID(FI)) {
7891 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7892 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7893 return true;
7894 }
7895
7896 return false;
7897 }
7898
7899 if (MemVT == EVT())
7900 return false;
7901
7902 if (N.getOpcode() != ISD::ADD)
7903 return false;
7904
7905 SDValue VScale = N.getOperand(1);
7906 int64_t MulImm = std::numeric_limits<int64_t>::max();
7907 if (VScale.getOpcode() == ISD::VSCALE) {
7908 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7909 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7910 int64_t ByteOffset = C->getSExtValue();
7911 const auto KnownVScale =
7913
7914 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7915 return false;
7916
7917 MulImm = ByteOffset / KnownVScale;
7918 } else
7919 return false;
7920
7921 TypeSize TS = MemVT.getSizeInBits();
7922 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7923
7924 if ((MulImm % MemWidthBytes) != 0)
7925 return false;
7926
7927 int64_t Offset = MulImm / MemWidthBytes;
7929 return false;
7930
7931 Base = N.getOperand(0);
7932 if (Base.getOpcode() == ISD::FrameIndex) {
7933 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7934 // We can only encode VL scaled offsets, so only fold in frame indexes
7935 // referencing SVE objects.
7936 if (MFI.hasScalableStackID(FI))
7937 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7938 }
7939
7940 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7941 return true;
7942}
7943
7944/// Select register plus register addressing mode for SVE, with scaled
7945/// offset.
7946bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7947 SDValue &Base,
7948 SDValue &Offset) {
7949 if (N.getOpcode() != ISD::ADD)
7950 return false;
7951
7952 // Process an ADD node.
7953 const SDValue LHS = N.getOperand(0);
7954 const SDValue RHS = N.getOperand(1);
7955
7956 // 8 bit data does not come with the SHL node, so it is treated
7957 // separately.
7958 if (Scale == 0) {
7959 Base = LHS;
7960 Offset = RHS;
7961 return true;
7962 }
7963
7964 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7965 int64_t ImmOff = C->getSExtValue();
7966 unsigned Size = 1 << Scale;
7967
7968 // To use the reg+reg addressing mode, the immediate must be a multiple of
7969 // the vector element's byte size.
7970 if (ImmOff % Size)
7971 return false;
7972
7973 SDLoc DL(N);
7974 Base = LHS;
7975 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7976 SDValue Ops[] = {Offset};
7977 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7978 Offset = SDValue(MI, 0);
7979 return true;
7980 }
7981
7982 // Check if the RHS is a shift node with a constant.
7983 if (RHS.getOpcode() != ISD::SHL)
7984 return false;
7985
7986 const SDValue ShiftRHS = RHS.getOperand(1);
7987 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7988 if (C->getZExtValue() == Scale) {
7989 Base = LHS;
7990 Offset = RHS.getOperand(0);
7991 return true;
7992 }
7993
7994 return false;
7995}
7996
7997bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7998 const AArch64TargetLowering *TLI =
7999 static_cast<const AArch64TargetLowering *>(getTargetLowering());
8000
8001 return TLI->isAllActivePredicate(*CurDAG, N);
8002}
8003
8004bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
8005 return N.getValueType().isScalableVectorOf(MVT::i1);
8006}
8007
8008bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
8010 unsigned Scale) {
8011 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
8012 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
8013 int64_t ImmOff = C->getSExtValue();
8014 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
8015 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
8016 }
8017 return SDValue();
8018 };
8019
8020 if (SDValue C = MatchConstantOffset(N)) {
8021 Base = getZeroRegister(*CurDAG, SDLoc(N), MVT::i32);
8022 Offset = C;
8023 return true;
8024 }
8025
8026 // Try to untangle an ADD node into a 'reg + offset'
8027 if (CurDAG->isBaseWithConstantOffset(N)) {
8028 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
8029 Base = N.getOperand(0);
8030 Offset = C;
8031 return true;
8032 }
8033 }
8034
8035 // By default, just match reg + 0.
8036 Base = N;
8037 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
8038 return true;
8039}
8040
8041bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
8042 SDValue &Imm) {
8044 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
8045 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
8046 // Check conservatively if the immediate fits the valid range [0, 64).
8047 // Immediate variants for GE and HS definitely need to be decremented
8048 // when lowering the pseudos later, so an immediate of 1 would become 0.
8049 // For the inverse conditions LT and LO we don't know for sure if they
8050 // will need a decrement but should the decision be made to reverse the
8051 // branch condition, we again end up with the need to decrement.
8052 // The same argument holds for LE, LS, GT and HI and possibly
8053 // incremented immediates. This can lead to slightly less optimal
8054 // codegen, e.g. we never codegen the legal case
8055 // cblt w0, #63, A
8056 // because we could end up with the illegal case
8057 // cbge w0, #64, B
8058 // should the decision to reverse the branch direction be made. For the
8059 // lower bound cases this is no problem since we can express comparisons
8060 // against 0 with either tbz/tnbz or using wzr/xzr.
8061 uint64_t LowerBound = 0, UpperBound = 64;
8062 switch (CC) {
8063 case AArch64CC::GE:
8064 case AArch64CC::HS:
8065 case AArch64CC::LT:
8066 case AArch64CC::LO:
8067 LowerBound = 1;
8068 break;
8069 case AArch64CC::LE:
8070 case AArch64CC::LS:
8071 case AArch64CC::GT:
8072 case AArch64CC::HI:
8073 UpperBound = 63;
8074 break;
8075 default:
8076 break;
8077 }
8078
8079 if (CN->getAPIntValue().uge(LowerBound) &&
8080 CN->getAPIntValue().ult(UpperBound)) {
8081 SDLoc DL(N);
8082 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
8083 return true;
8084 }
8085 }
8086
8087 return false;
8088}
8089
8090template <bool MatchCBB>
8091bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
8092 SDValue &ExtType) {
8093
8094 // Use an invalid shift-extend value to indicate we don't need to extend later
8095 if (N.getOpcode() == ISD::AssertZext || N.getOpcode() == ISD::AssertSext) {
8096 EVT Ty = cast<VTSDNode>(N.getOperand(1))->getVT();
8097 if (Ty != (MatchCBB ? MVT::i8 : MVT::i16))
8098 return false;
8099 Reg = N.getOperand(0);
8100 ExtType = CurDAG->getSignedTargetConstant(AArch64_AM::InvalidShiftExtend,
8101 SDLoc(N), MVT::i32);
8102 return true;
8103 }
8104
8106
8107 if ((MatchCBB && (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB)) ||
8108 (!MatchCBB && (ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH))) {
8109 Reg = N.getOperand(0);
8110 ExtType =
8111 CurDAG->getTargetConstant(getExtendEncoding(ET), SDLoc(N), MVT::i32);
8112 return true;
8113 }
8114
8115 return false;
8116}
8117
8118/// Try to fold AArch64 CSEL/FCMP patterns to FMAXNM/FMINNM.
8119///
8120/// This is intentionally done in PreprocessISelDAG rather than DAGCombine:
8121/// doing this earlier based on the defining operation of X can be invalidated
8122/// by later DAG combines. At this point the DAG is being prepared for
8123/// instruction selection, so the use of isKnownNeverSNaN(X) applies to the
8124/// final SDValue being selected.
8125/// Only handles FCMP(X, C) with scalar FP types, where C is a non-NaN constant.
8126/// The nsz requirement is needed only when C is zero, to avoid signed-zero
8127/// mismatches. The never-sNaN check is required because AArch64 FMAXNM/FMINNM
8128/// differ from fcmp+fcsel for signaling NaN inputs.
8129SDValue AArch64DAGToDAGISel::tryFoldCselToFMaxMin(SDNode &N) {
8130 EVT VT = N.getValueType(0);
8131
8132 // Scalar FP only.
8133 if (!VT.isFloatingPoint() || VT.isVector())
8134 return SDValue();
8135
8136 SDValue TVal = N.getOperand(0);
8137 SDValue FVal = N.getOperand(1);
8138 SDValue CCVal = N.getOperand(2);
8139 SDValue Cmp = N.getOperand(3);
8140
8141 if (Cmp.getOpcode() != AArch64ISD::FCMP)
8142 return SDValue();
8143
8144 auto *CC = dyn_cast<ConstantSDNode>(CCVal);
8145 if (!CC)
8146 return SDValue();
8147
8148 SDValue CmpLHS = Cmp.getOperand(0);
8149 SDValue CmpRHS = Cmp.getOperand(1);
8150 unsigned CondCode = CC->getZExtValue();
8151
8152 // Map VT and operation (max/min) to machine opcode.
8153 auto getOpc = [](EVT VT, bool isMax) -> unsigned {
8154 if (VT == MVT::f16)
8155 return isMax ? AArch64::FMAXNMHrr : AArch64::FMINNMHrr;
8156 else if (VT == MVT::f32)
8157 return isMax ? AArch64::FMAXNMSrr : AArch64::FMINNMSrr;
8158 else if (VT == MVT::f64)
8159 return isMax ? AArch64::FMAXNMDrr : AArch64::FMINNMDrr;
8160 else
8161 return 0; // unsupported
8162 };
8163
8164 // Determine whether to use max or min based on condition code and operands.
8165 bool isMax;
8166 if (CondCode == AArch64CC::GT || CondCode == AArch64CC::GE) {
8167 if (TVal == CmpLHS && FVal == CmpRHS)
8168 isMax = true;
8169 else
8170 return SDValue();
8171 } else if (CondCode == AArch64CC::MI || CondCode == AArch64CC::LS) {
8172 if (TVal == CmpLHS && FVal == CmpRHS)
8173 isMax = false;
8174 else
8175 return SDValue();
8176 } else {
8177 return SDValue();
8178 }
8179
8180 // Get the machine opcode for this VT and operation.
8181 unsigned Opc = getOpc(VT, isMax);
8182 if (!Opc)
8183 return SDValue();
8184
8185 // Constant must be non-NaN.
8186 auto *CFP = dyn_cast<ConstantFPSDNode>(CmpRHS);
8187 if (!CFP || CFP->getValueAPF().isNaN())
8188 return SDValue();
8189
8190 // nsz flag required only when constant is zero: fmaxnm(+0,-0)=+0 differs from
8191 // fcmp+select's -0. For non-zero constants, semantics are identical.
8192 if (CFP->isZero() && !N.getFlags().hasNoSignedZeros())
8193 return SDValue();
8194
8195 // Only fold if variable operand is never sNaN.
8196 // This runs after DAG combines, so later combines cannot remove a defining
8197 // operation used by isKnownNeverSNaN().
8198 if (!CurDAG->isKnownNeverSNaN(CmpLHS))
8199 return SDValue();
8200
8201 SDLoc DL(&N);
8202
8203 // Directly emit the machine node
8204 return SDValue(CurDAG->getMachineNode(Opc, DL, VT, CmpLHS, CmpRHS), 0);
8205}
8206
8207void AArch64DAGToDAGISel::PreprocessISelDAG() {
8208 bool MadeChange = false;
8209 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
8210 if (N.use_empty())
8211 continue;
8212
8214 switch (N.getOpcode()) {
8215 case ISD::SCALAR_TO_VECTOR: {
8216 EVT ScalarTy = N.getValueType(0).getVectorElementType();
8217 if ((ScalarTy == MVT::i32 || ScalarTy == MVT::i64) &&
8218 ScalarTy == N.getOperand(0).getValueType())
8219 Result = addBitcastHints(*CurDAG, N);
8220
8221 break;
8222 }
8223 case AArch64ISD::CSEL:
8224 Result = tryFoldCselToFMaxMin(N);
8225 break;
8226 default:
8227 break;
8228 }
8229
8230 if (Result) {
8231 LLVM_DEBUG(dbgs() << "AArch64 DAG preprocessing replacing:\nOld: ");
8232 LLVM_DEBUG(N.dump(CurDAG));
8233 LLVM_DEBUG(dbgs() << "\nNew: ");
8234 LLVM_DEBUG(Result.dump(CurDAG));
8235 LLVM_DEBUG(dbgs() << "\n");
8236
8237 CurDAG->ReplaceAllUsesOfValueWith(SDValue(&N, 0), Result);
8238 MadeChange = true;
8239 }
8240 }
8241
8242 if (MadeChange)
8243 CurDAG->RemoveDeadNodes();
8244
8246}
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static std::optional< APInt > GetNEONSplatValue(SDValue N)
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static SDValue addBitcastHints(SelectionDAG &DAG, SDNode &N)
addBitcastHints - This method adds bitcast hints to the operands of a node to help instruction select...
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static std::optional< APInt > DecodeNEONSplat(SDValue N)
static bool checkCVTFixedPointOperandWithFBitsForVectors(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static SDValue getZeroRegister(SelectionDAG &DAG, SDLoc DL, EVT VT)
Returns a copy from WZR or XZR.
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static APInt DecodeFMOVImm(uint64_t Imm, unsigned RegWidth)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static EVT getMultipleVectorType(LLVMContext &Ctx, EVT VecVT, unsigned NumVec)
Builds an integer vector type large enough to hold NumVec instances of VecVT.
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
return SDValue()
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:484
#define LLVM_DEBUG(...)
Definition Debug.h:119
#define PASS_NAME
Value * RHS
Value * LHS
AArch64DAGToDAGISelPass(AArch64TargetMachine &TM)
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
bool isStreaming() const
Returns true if the function has a streaming body.
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
unsigned getSVEVectorSizeInBits() const
bool isAllActivePredicate(const SelectionDAG &DAG, SDValue N) const
Register matchRegisterName(StringRef RegName) const
static const fltSemantics & IEEEsingle()
Definition APFloat.h:297
static const fltSemantics & IEEEdouble()
Definition APFloat.h:298
static const fltSemantics & IEEEhalf()
Definition APFloat.h:295
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1693
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1475
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
iterator begin() const
Definition ArrayRef.h:129
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasScalableStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISelPass(std::unique_ptr< SelectionDAGISel > Selector)
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual void PreprocessISelDAG()
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getID() const
Return the register class ID number.
LLVM Value Representation.
Definition Value.h:75
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:993
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t decodeAdvSIMDModImmType12(uint8_t Imm)
constexpr bool isLegalArithImmed(const uint64_t C)
isLegalArithImmed -
static uint64_t decodeAdvSIMDModImmType11(uint8_t Imm)
unsigned getExtendEncoding(AArch64_AM::ShiftExtendType ET)
Mapping from extend bits to required operation: shifter: 000 ==> uxtb 001 ==> uxth 010 ==> uxtw 011 =...
static uint64_t decodeAdvSIMDModImmType10(uint8_t Imm)
static bool isSVELogicalImm(unsigned SizeInBits, uint64_t ImmVal, uint64_t &Encoding)
constexpr unsigned getArithImmedShift(const uint64_t C)
getArithImmedShift - assumes C is a legal immediate for arithmetic instructions and
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
isSignExtendShiftType - Returns true if Type is sign extending.
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:861
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:852
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:858
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:896
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:864
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Not(const Pred &P) -> Not< Pred >
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:558
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
unsigned CheckFixedPointOperandConstant(APFloat &FVal, unsigned RegWidth, bool isReciprocal)
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2025
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:860
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
Extended Value Type.
Definition ValueTypes.h:35
bool isScalableVectorOf(EVT EltVT) const
Return true if this is a scalable vector with matching element type.
Definition ValueTypes.h:192
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:129
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
ElementCount getVectorElementCount() const
Definition ValueTypes.h:373
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:494
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:382
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:230
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:404
bool isFixedLengthVector() const
Definition ValueTypes.h:199
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:346
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:187
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:225
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.