LLVM 23.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64.h"
17#include "llvm/ADT/APSInt.h"
20#include "llvm/IR/Function.h" // To access function attributes.
21#include "llvm/IR/GlobalValue.h"
22#include "llvm/IR/Intrinsics.h"
23#include "llvm/IR/IntrinsicsAArch64.h"
24#include "llvm/Support/Debug.h"
29
30using namespace llvm;
31
32#define DEBUG_TYPE "aarch64-isel"
33#define PASS_NAME "AArch64 Instruction Selection"
34
35// https://github.com/llvm/llvm-project/issues/114425
36#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
37#pragma inline_depth(0)
38#endif
39
40//===--------------------------------------------------------------------===//
41/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
42/// instructions for SelectionDAG operations.
43///
44namespace {
45
46class AArch64DAGToDAGISel : public SelectionDAGISel {
47
48 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
49 /// make the right decision when generating code for different targets.
50 const AArch64Subtarget *Subtarget;
51
52public:
53 AArch64DAGToDAGISel() = delete;
54
55 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
56 CodeGenOptLevel OptLevel)
57 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
58
59 bool runOnMachineFunction(MachineFunction &MF) override {
60 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
62 }
63
64 void Select(SDNode *Node) override;
65 void PreprocessISelDAG() override;
66
67 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
68 /// inline asm expressions.
69 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
70 InlineAsm::ConstraintCode ConstraintID,
71 std::vector<SDValue> &OutOps) override;
72
73 template <signed Low, signed High, signed Scale>
74 bool SelectRDVLImm(SDValue N, SDValue &Imm);
75
76 template <signed Low, signed High>
77 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
78
79 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
80 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
81 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
82 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
83 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
84 return SelectShiftedRegister(N, false, Reg, Shift);
85 }
86 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
87 return SelectShiftedRegister(N, true, Reg, Shift);
88 }
89 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
90 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
91 }
92 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
93 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
94 }
95 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
96 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
97 }
98 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
99 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
100 }
101 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
102 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
103 }
104 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
105 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
106 }
107 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
108 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
109 }
110 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
111 return SelectAddrModeIndexed(N, 1, Base, OffImm);
112 }
113 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
114 return SelectAddrModeIndexed(N, 2, Base, OffImm);
115 }
116 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
117 return SelectAddrModeIndexed(N, 4, Base, OffImm);
118 }
119 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
120 return SelectAddrModeIndexed(N, 8, Base, OffImm);
121 }
122 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
123 return SelectAddrModeIndexed(N, 16, Base, OffImm);
124 }
125 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
126 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
127 }
128 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
129 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
130 }
131 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
132 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
133 }
134 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
135 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
136 }
137 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
138 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
139 }
140 template <unsigned Size, unsigned Max>
141 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
142 // Test if there is an appropriate addressing mode and check if the
143 // immediate fits.
144 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
145 if (Found) {
146 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
147 int64_t C = CI->getSExtValue();
148 if (C <= Max)
149 return true;
150 }
151 }
152
153 // Otherwise, base only, materialize address in register.
154 Base = N;
155 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
156 return true;
157 }
158
159 template<int Width>
160 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
161 SDValue &SignExtend, SDValue &DoShift) {
162 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
163 }
164
165 template<int Width>
166 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
167 SDValue &SignExtend, SDValue &DoShift) {
168 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
169 }
170
171 bool SelectExtractHigh(SDValue N, SDValue &Res) {
172 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
173 N = N->getOperand(0);
174 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
175 !isa<ConstantSDNode>(N->getOperand(1)))
176 return false;
177 EVT VT = N->getValueType(0);
178 EVT LVT = N->getOperand(0).getValueType();
179 unsigned Index = N->getConstantOperandVal(1);
180 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
181 Index != VT.getVectorNumElements())
182 return false;
183 Res = N->getOperand(0);
184 return true;
185 }
186
187 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
188 if (N.getOpcode() != AArch64ISD::VLSHR)
189 return false;
190 SDValue Op = N->getOperand(0);
191 EVT VT = Op.getValueType();
192 unsigned ShtAmt = N->getConstantOperandVal(1);
193 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
194 return false;
195
196 APInt Imm;
197 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
198 Imm = APInt(VT.getScalarSizeInBits(),
199 Op.getOperand(1).getConstantOperandVal(0)
200 << Op.getOperand(1).getConstantOperandVal(1));
201 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
202 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
203 Imm = APInt(VT.getScalarSizeInBits(),
204 Op.getOperand(1).getConstantOperandVal(0));
205 else
206 return false;
207
208 if (Imm != 1ULL << (ShtAmt - 1))
209 return false;
210
211 Res1 = Op.getOperand(0);
212 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
213 return true;
214 }
215
216 bool SelectDupZeroOrUndef(SDValue N) {
217 switch(N->getOpcode()) {
218 case ISD::UNDEF:
219 return true;
220 case AArch64ISD::DUP:
221 case ISD::SPLAT_VECTOR: {
222 auto Opnd0 = N->getOperand(0);
223 if (isNullConstant(Opnd0))
224 return true;
225 if (isNullFPConstant(Opnd0))
226 return true;
227 break;
228 }
229 default:
230 break;
231 }
232
233 return false;
234 }
235
236 bool SelectAny(SDValue) { return true; }
237
238 bool SelectDupZero(SDValue N) {
239 switch(N->getOpcode()) {
240 case AArch64ISD::DUP:
241 case ISD::SPLAT_VECTOR: {
242 auto Opnd0 = N->getOperand(0);
243 if (isNullConstant(Opnd0))
244 return true;
245 if (isNullFPConstant(Opnd0))
246 return true;
247 break;
248 }
249 }
250
251 return false;
252 }
253
254 template <MVT::SimpleValueType VT, bool Negate>
255 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
256 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
257 }
258
259 template <MVT::SimpleValueType VT, bool Negate>
260 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
261 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
262 }
263
264 template <MVT::SimpleValueType VT>
265 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
266 return SelectSVECpyDupImm(N, VT, Imm, Shift);
267 }
268
269 template <MVT::SimpleValueType VT, bool Invert = false>
270 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
271 return SelectSVELogicalImm(N, VT, Imm, Invert);
272 }
273
274 template <MVT::SimpleValueType VT>
275 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
276 return SelectSVEArithImm(N, VT, Imm);
277 }
278
279 template <unsigned Low, unsigned High, bool AllowSaturation = false>
280 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
281 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
282 }
283
284 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
285 if (N->getOpcode() != ISD::SPLAT_VECTOR)
286 return false;
287
288 EVT EltVT = N->getValueType(0).getVectorElementType();
289 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
290 /* High */ EltVT.getFixedSizeInBits(),
291 /* AllowSaturation */ true, Imm);
292 }
293
294 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
295 template<signed Min, signed Max, signed Scale, bool Shift>
296 bool SelectCntImm(SDValue N, SDValue &Imm) {
298 return false;
299
300 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
301 if (Shift)
302 MulImm = 1LL << MulImm;
303
304 if ((MulImm % std::abs(Scale)) != 0)
305 return false;
306
307 MulImm /= Scale;
308 if ((MulImm >= Min) && (MulImm <= Max)) {
309 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
310 return true;
311 }
312
313 return false;
314 }
315
316 template <signed Max, signed Scale>
317 bool SelectEXTImm(SDValue N, SDValue &Imm) {
319 return false;
320
321 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
322
323 if (MulImm >= 0 && MulImm <= Max) {
324 MulImm *= Scale;
325 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
326 return true;
327 }
328
329 return false;
330 }
331
332 template <unsigned BaseReg, unsigned Max>
333 bool ImmToReg(SDValue N, SDValue &Imm) {
334 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
335 uint64_t C = CI->getZExtValue();
336
337 if (C > Max)
338 return false;
339
340 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
341 return true;
342 }
343 return false;
344 }
345
346 /// Form sequences of consecutive 64/128-bit registers for use in NEON
347 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
348 /// between 1 and 4 elements. If it contains a single element that is returned
349 /// unchanged; otherwise a REG_SEQUENCE value is returned.
352 // Form a sequence of SVE registers for instructions using list of vectors,
353 // e.g. structured loads and stores (ldN, stN).
354 SDValue createZTuple(ArrayRef<SDValue> Vecs);
355
356 // Similar to above, except the register must start at a multiple of the
357 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
358 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
359
360 /// Generic helper for the createDTuple/createQTuple
361 /// functions. Those should almost always be called instead.
362 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
363 const unsigned SubRegs[]);
364
365 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
366
367 bool tryIndexedLoad(SDNode *N);
368
369 void SelectPtrauthAuth(SDNode *N);
370 void SelectPtrauthResign(SDNode *N);
371
372 bool trySelectStackSlotTagP(SDNode *N);
373 void SelectTagP(SDNode *N);
374
375 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
376 unsigned SubRegIdx);
377 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
378 unsigned SubRegIdx);
379 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
380 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
381 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
382 unsigned Opc_rr, unsigned Opc_ri,
383 bool IsIntr = false);
384 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
385 unsigned Scale, unsigned Opc_ri,
386 unsigned Opc_rr);
387 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
388 bool IsZmMulti, unsigned Opcode,
389 bool HasPred = false);
390 void SelectPExtPair(SDNode *N, unsigned Opc);
391 void SelectWhilePair(SDNode *N, unsigned Opc);
392 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
394 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
395 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
396 bool IsTupleInput, unsigned Opc);
397 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
398
399 template <unsigned MaxIdx, unsigned Scale>
400 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
401 unsigned Op);
402 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
403 unsigned Op, unsigned MaxIdx, unsigned Scale,
404 unsigned BaseReg = 0);
405 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
406 /// SVE Reg+Imm addressing mode.
407 template <int64_t Min, int64_t Max>
408 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
409 SDValue &OffImm);
410 /// SVE Reg+Reg address mode.
411 template <unsigned Scale>
412 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
413 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
414 }
415
416 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
417 unsigned Opc, uint32_t MaxImm);
418
419 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
420
421 template <unsigned MaxIdx, unsigned Scale>
422 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
423 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
424 }
425
426 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
428 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
429 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
430 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
431 unsigned Opc_rr, unsigned Opc_ri);
432 std::tuple<unsigned, SDValue, SDValue>
433 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
434 const SDValue &OldBase, const SDValue &OldOffset,
435 unsigned Scale);
436
437 bool tryBitfieldExtractOp(SDNode *N);
438 bool tryBitfieldExtractOpFromSExt(SDNode *N);
439 bool tryBitfieldInsertOp(SDNode *N);
440 bool tryBitfieldInsertInZeroOp(SDNode *N);
441 bool tryShiftAmountMod(SDNode *N);
442
443 bool tryReadRegister(SDNode *N);
444 bool tryWriteRegister(SDNode *N);
445
446 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
447 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
448
449 bool trySelectXAR(SDNode *N);
450
451 SDValue tryFoldCselToFMaxMin(SDNode &N);
452
453// Include the pieces autogenerated from the target description.
454#include "AArch64GenDAGISel.inc"
455
456private:
457 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
458 SDValue &Shift);
459 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
460 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
461 SDValue &OffImm) {
462 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
463 }
464 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
465 unsigned Size, SDValue &Base,
466 SDValue &OffImm);
467 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
468 SDValue &OffImm);
469 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
470 SDValue &OffImm);
471 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
472 SDValue &Offset, SDValue &SignExtend,
473 SDValue &DoShift);
474 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
475 SDValue &Offset, SDValue &SignExtend,
476 SDValue &DoShift);
477 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
478 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
479 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
480 SDValue &Offset, SDValue &SignExtend);
481
482 template<unsigned RegWidth>
483 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
484 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
485 }
486 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
487
488 template <unsigned RegWidth>
489 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos) {
490 return SelectCVTFixedPointVec(N, FixedPos, RegWidth);
491 }
492 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos, unsigned Width);
493
494 template<unsigned RegWidth>
495 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
496 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
497 }
498
499 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
500 unsigned Width);
501
502 template <unsigned FloatWidth>
503 bool SelectCVTFixedPosRecipOperandVec(SDValue N, SDValue &FixedPos) {
504 return SelectCVTFixedPosRecipOperandVec(N, FixedPos, FloatWidth);
505 }
506
507 bool SelectCVTFixedPosRecipOperandVec(SDValue N, SDValue &FixedPos,
508 unsigned Width);
509
510 bool SelectCMP_SWAP(SDNode *N);
511
512 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
513 bool Negate);
514 bool SelectSVEAddSubImm(SDLoc DL, APInt Value, MVT VT, SDValue &Imm,
515 SDValue &Shift, bool Negate);
516 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
517 bool Negate);
518 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
519 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
520
521 // Match `<NEON Splat> SVEImm` (where <NEON Splat> could be fmov, movi, etc).
522 bool SelectNEONSplatOfSVELogicalImm(SDValue N, SDValue &Imm);
523 bool SelectNEONSplatOfSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift);
524 bool SelectNEONSplatOfSVEArithSImm(SDValue N, SDValue &Imm);
525
526 bool SelectSVESignedArithImm(SDLoc DL, APInt Value, SDValue &Imm);
527 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
528 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
529 bool AllowSaturation, SDValue &Imm);
530
531 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
532 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
533 SDValue &Offset);
534 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
535 SDValue &Offset, unsigned Scale = 1);
536
537 bool SelectAllActivePredicate(SDValue N);
538 bool SelectAnyPredicate(SDValue N);
539
540 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
541
542 template <bool MatchCBB>
543 bool SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, SDValue &ExtType);
544};
545
546class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
547public:
548 static char ID;
549 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
550 CodeGenOptLevel OptLevel)
552 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
553};
554} // end anonymous namespace
555
556char AArch64DAGToDAGISelLegacy::ID = 0;
557
558INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
559
562 std::make_unique<AArch64DAGToDAGISel>(TM, TM.getOptLevel())) {}
563
564/// addBitcastHints - This method adds bitcast hints to the operands of a node
565/// to help instruction selector determine which operands are in Neon registers.
567 SDLoc DL(&N);
568 auto getFloatVT = [&](EVT VT) {
569 EVT ScalarVT = VT.getScalarType();
570 assert((ScalarVT == MVT::i32 || ScalarVT == MVT::i64) && "Unexpected VT");
571 return VT.changeElementType(*(DAG.getContext()),
572 ScalarVT == MVT::i32 ? MVT::f32 : MVT::f64);
573 };
575 NewOps.reserve(N.getNumOperands());
576
577 for (unsigned I = 0, E = N.getNumOperands(); I < E; ++I) {
578 auto bitcasted = DAG.getBitcast(getFloatVT(N.getOperand(I).getValueType()),
579 N.getOperand(I));
580 NewOps.push_back(bitcasted);
581 }
582 EVT OrigVT = N.getValueType(0);
583 SDValue OpNode = DAG.getNode(N.getOpcode(), DL, getFloatVT(OrigVT), NewOps);
584 return DAG.getBitcast(OrigVT, OpNode);
585}
586
587/// isIntImmediate - This method tests to see if the node is a constant
588/// operand. If so Imm will receive the 64-bit value.
589static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
591 Imm = C->getZExtValue();
592 return true;
593 }
594 return false;
595}
596
597// isIntImmediate - This method tests to see if a constant operand.
598// If so Imm will receive the value.
599static bool isIntImmediate(SDValue N, uint64_t &Imm) {
600 return isIntImmediate(N.getNode(), Imm);
601}
602
603// isOpcWithIntImmediate - This method tests to see if the node is a specific
604// opcode and that it has a immediate integer right operand.
605// If so Imm will receive the 32 bit value.
606static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
607 uint64_t &Imm) {
608 return N->getOpcode() == Opc &&
609 isIntImmediate(N->getOperand(1).getNode(), Imm);
610}
611
612// isIntImmediateEq - This method tests to see if N is a constant operand that
613// is equivalent to 'ImmExpected'.
614#ifndef NDEBUG
615static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
616 uint64_t Imm;
617 if (!isIntImmediate(N.getNode(), Imm))
618 return false;
619 return Imm == ImmExpected;
620}
621#endif
622
623static APInt DecodeFMOVImm(uint64_t Imm, unsigned RegWidth) {
624 assert(RegWidth == 32 || RegWidth == 64);
625 if (RegWidth == 32)
626 return APInt(RegWidth,
628 return APInt(RegWidth, AArch64_AM::decodeAdvSIMDModImmType12(Imm));
629}
630
631// Decodes the raw integer splat value from a NEON splat operation.
632static std::optional<APInt> DecodeNEONSplat(SDValue N) {
633 assert(N.getValueType().isInteger() && "Only integers are supported");
634 if (N->getOpcode() == AArch64ISD::NVCAST)
635 N = N->getOperand(0);
636 unsigned SplatWidth = N.getScalarValueSizeInBits();
637 if (N.getOpcode() == AArch64ISD::FMOV)
638 return DecodeFMOVImm(N.getConstantOperandVal(0), SplatWidth);
639 if (N->getOpcode() == AArch64ISD::MOVI)
640 return APInt(SplatWidth, N.getConstantOperandVal(0));
641 if (N->getOpcode() == AArch64ISD::MOVIshift)
642 return APInt(SplatWidth, N.getConstantOperandVal(0)
643 << N.getConstantOperandVal(1));
644 if (N->getOpcode() == AArch64ISD::MVNIshift)
645 return ~APInt(SplatWidth, N.getConstantOperandVal(0)
646 << N.getConstantOperandVal(1));
647 if (N->getOpcode() == AArch64ISD::MOVIedit)
649 N.getConstantOperandVal(0)));
650 if (N->getOpcode() == AArch64ISD::DUP)
651 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(0)))
652 return Const->getAPIntValue().trunc(SplatWidth);
653 // TODO: Recognize more splat-like NEON operations. See ConstantBuildVector
654 // in AArch64ISelLowering.
655 return std::nullopt;
656}
657
658// If \p N is a NEON splat operation (movi, fmov, etc), return the splat value
659// matching the element size of N.
660static std::optional<APInt> GetNEONSplatValue(SDValue N) {
661 unsigned SplatWidth = N.getScalarValueSizeInBits();
662 if (std::optional<APInt> SplatVal = DecodeNEONSplat(N)) {
663 if (SplatVal->getBitWidth() <= SplatWidth)
664 return APInt::getSplat(SplatWidth, *SplatVal);
665 if (SplatVal->isSplat(SplatWidth))
666 return SplatVal->trunc(SplatWidth);
667 }
668 return std::nullopt;
669}
670
671bool AArch64DAGToDAGISel::SelectNEONSplatOfSVELogicalImm(SDValue N,
672 SDValue &Imm) {
673 std::optional<APInt> ImmVal = GetNEONSplatValue(N);
674 if (!ImmVal)
675 return false;
676 uint64_t Encoding;
677 if (!AArch64_AM::isSVELogicalImm(N.getScalarValueSizeInBits(),
678 ImmVal->getZExtValue(), Encoding))
679 return false;
680
681 Imm = CurDAG->getTargetConstant(Encoding, SDLoc(N), MVT::i64);
682 return true;
683}
684
685bool AArch64DAGToDAGISel::SelectNEONSplatOfSVEAddSubImm(SDValue N, SDValue &Imm,
686 SDValue &Shift) {
687 if (std::optional<APInt> ImmVal = GetNEONSplatValue(N))
688 return SelectSVEAddSubImm(SDLoc(N), *ImmVal,
689 N.getValueType().getScalarType().getSimpleVT(),
690 Imm, Shift,
691 /*Negate=*/false);
692 return false;
693}
694
695bool AArch64DAGToDAGISel::SelectNEONSplatOfSVEArithSImm(SDValue N,
696 SDValue &Imm) {
697 if (std::optional<APInt> ImmVal = GetNEONSplatValue(N))
698 return SelectSVESignedArithImm(SDLoc(N), *ImmVal, Imm);
699 return false;
700}
701
702bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
703 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
704 std::vector<SDValue> &OutOps) {
705 switch(ConstraintID) {
706 default:
707 llvm_unreachable("Unexpected asm memory constraint");
708 case InlineAsm::ConstraintCode::m:
709 case InlineAsm::ConstraintCode::o:
710 case InlineAsm::ConstraintCode::Q:
711 // We need to make sure that this one operand does not end up in XZR, thus
712 // require the address to be in a PointerRegClass register.
713 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
714 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
715 SDLoc dl(Op);
716 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
717 SDValue NewOp =
718 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
719 dl, Op.getValueType(),
720 Op, RC), 0);
721 OutOps.push_back(NewOp);
722 return false;
723 }
724 return true;
725}
726
727/// SelectArithImmed - Select an immediate value that can be represented as
728/// a 12-bit value shifted left by either 0 or 12. If so, return true with
729/// Val set to the 12-bit value and Shift set to the shifter operand.
730bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
731 SDValue &Shift) {
732 // This function is called from the addsub_shifted_imm ComplexPattern,
733 // which lists [imm] as the list of opcode it's interested in, however
734 // we still need to check whether the operand is actually an immediate
735 // here because the ComplexPattern opcode list is only used in
736 // root-level opcode matching.
737 if (!isa<ConstantSDNode>(N.getNode()))
738 return false;
739
740 uint64_t Immed = N.getNode()->getAsZExtVal();
741
743 return false;
744
745 unsigned ShiftAmt = AArch64_AM::getArithImmedShift(Immed);
746 Immed >>= ShiftAmt;
747
748 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
749 SDLoc dl(N);
750 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
751 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
752 return true;
753}
754
755/// SelectNegArithImmed - As above, but negates the value before trying to
756/// select it.
757bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
758 SDValue &Shift) {
759 // This function is called from the addsub_shifted_imm ComplexPattern,
760 // which lists [imm] as the list of opcode it's interested in, however
761 // we still need to check whether the operand is actually an immediate
762 // here because the ComplexPattern opcode list is only used in
763 // root-level opcode matching.
764 if (!isa<ConstantSDNode>(N.getNode()))
765 return false;
766
767 // The immediate operand must be a 24-bit zero-extended immediate.
768 uint64_t Immed = N.getNode()->getAsZExtVal();
769
770 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
771 // have the opposite effect on the C flag, so this pattern mustn't match under
772 // those circumstances.
773 if (Immed == 0)
774 return false;
775
776 if (N.getValueType() == MVT::i32)
777 Immed = ~((uint32_t)Immed) + 1;
778 else
779 Immed = ~Immed + 1ULL;
780 if (Immed & 0xFFFFFFFFFF000000ULL)
781 return false;
782
783 Immed &= 0xFFFFFFULL;
784 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
785 Shift);
786}
787
788/// getShiftTypeForNode - Translate a shift node to the corresponding
789/// ShiftType value.
791 switch (N.getOpcode()) {
792 default:
794 case ISD::SHL:
795 return AArch64_AM::LSL;
796 case ISD::SRL:
797 return AArch64_AM::LSR;
798 case ISD::SRA:
799 return AArch64_AM::ASR;
800 case ISD::ROTR:
801 return AArch64_AM::ROR;
802 }
803}
804
806 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
807}
808
809/// Determine whether it is worth it to fold SHL into the addressing
810/// mode.
812 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
813 // It is worth folding logical shift of up to three places.
814 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
815 if (!CSD)
816 return false;
817 unsigned ShiftVal = CSD->getZExtValue();
818 if (ShiftVal > 3)
819 return false;
820
821 // Check if this particular node is reused in any non-memory related
822 // operation. If yes, do not try to fold this node into the address
823 // computation, since the computation will be kept.
824 const SDNode *Node = V.getNode();
825 for (SDNode *UI : Node->users())
826 if (!isMemOpOrPrefetch(UI))
827 for (SDNode *UII : UI->users())
828 if (!isMemOpOrPrefetch(UII))
829 return false;
830 return true;
831}
832
833/// Determine whether it is worth to fold V into an extended register addressing
834/// mode.
835bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
836 // Trivial if we are optimizing for code size or if there is only
837 // one use of the value.
838 if (CurDAG->shouldOptForSize() || V.hasOneUse())
839 return true;
840
841 // If a subtarget has a slow shift, folding a shift into multiple loads
842 // costs additional micro-ops.
843 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
844 return false;
845
846 // Check whether we're going to emit the address arithmetic anyway because
847 // it's used by a non-address operation.
848 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
849 return true;
850 if (V.getOpcode() == ISD::ADD) {
851 const SDValue LHS = V.getOperand(0);
852 const SDValue RHS = V.getOperand(1);
853 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
854 return true;
855 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
856 return true;
857 }
858
859 // It hurts otherwise, since the value will be reused.
860 return false;
861}
862
863/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
864/// to select more shifted register
865bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
866 SDValue &Shift) {
867 EVT VT = N.getValueType();
868 if (VT != MVT::i32 && VT != MVT::i64)
869 return false;
870
871 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
872 return false;
873 SDValue LHS = N.getOperand(0);
874 if (!LHS->hasOneUse())
875 return false;
876
877 unsigned LHSOpcode = LHS->getOpcode();
878 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
879 return false;
880
881 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
882 if (!ShiftAmtNode)
883 return false;
884
885 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
886 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
887 if (!RHSC)
888 return false;
889
890 APInt AndMask = RHSC->getAPIntValue();
891 unsigned LowZBits, MaskLen;
892 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
893 return false;
894
895 unsigned BitWidth = N.getValueSizeInBits();
896 SDLoc DL(LHS);
897 uint64_t NewShiftC;
898 unsigned NewShiftOp;
899 if (LHSOpcode == ISD::SHL) {
900 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
901 // BitWidth != LowZBits + MaskLen doesn't match the pattern
902 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
903 return false;
904
905 NewShiftC = LowZBits - ShiftAmtC;
906 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
907 } else {
908 if (LowZBits == 0)
909 return false;
910
911 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
912 NewShiftC = LowZBits + ShiftAmtC;
913 if (NewShiftC >= BitWidth)
914 return false;
915
916 // SRA need all high bits
917 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
918 return false;
919
920 // SRL high bits can be 0 or 1
921 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
922 return false;
923
924 if (LHSOpcode == ISD::SRL)
925 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
926 else
927 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
928 }
929
930 assert(NewShiftC < BitWidth && "Invalid shift amount");
931 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
932 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
933 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
934 NewShiftAmt, BitWidthMinus1),
935 0);
936 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
937 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
938 return true;
939}
940
941/// getExtendTypeForNode - Translate an extend node to the corresponding
942/// ExtendType value.
944getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
945 if (N.getOpcode() == ISD::SIGN_EXTEND ||
946 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
947 EVT SrcVT;
948 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
949 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
950 else
951 SrcVT = N.getOperand(0).getValueType();
952
953 if (!IsLoadStore && SrcVT == MVT::i8)
954 return AArch64_AM::SXTB;
955 else if (!IsLoadStore && SrcVT == MVT::i16)
956 return AArch64_AM::SXTH;
957 else if (SrcVT == MVT::i32)
958 return AArch64_AM::SXTW;
959 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
960
962 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
963 N.getOpcode() == ISD::ANY_EXTEND) {
964 EVT SrcVT = N.getOperand(0).getValueType();
965 if (!IsLoadStore && SrcVT == MVT::i8)
966 return AArch64_AM::UXTB;
967 else if (!IsLoadStore && SrcVT == MVT::i16)
968 return AArch64_AM::UXTH;
969 else if (SrcVT == MVT::i32)
970 return AArch64_AM::UXTW;
971 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
972
974 } else if (N.getOpcode() == ISD::AND) {
975 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
976 if (!CSD)
978 uint64_t AndMask = CSD->getZExtValue();
979
980 switch (AndMask) {
981 default:
983 case 0xFF:
984 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
985 case 0xFFFF:
986 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
987 case 0xFFFFFFFF:
988 return AArch64_AM::UXTW;
989 }
990 }
991
993}
994
995/// Determine whether it is worth to fold V into an extended register of an
996/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
997/// instruction, and the shift should be treated as worth folding even if has
998/// multiple uses.
999bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
1000 // Trivial if we are optimizing for code size or if there is only
1001 // one use of the value.
1002 if (CurDAG->shouldOptForSize() || V.hasOneUse())
1003 return true;
1004
1005 // If a subtarget has a fastpath LSL we can fold a logical shift into
1006 // the add/sub and save a cycle.
1007 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
1008 V.getConstantOperandVal(1) <= 4 &&
1010 return true;
1011
1012 // It hurts otherwise, since the value will be reused.
1013 return false;
1014}
1015
1016/// SelectShiftedRegister - Select a "shifted register" operand. If the value
1017/// is not shifted, set the Shift operand to default of "LSL 0". The logical
1018/// instructions allow the shifted register to be rotated, but the arithmetic
1019/// instructions do not. The AllowROR parameter specifies whether ROR is
1020/// supported.
1021bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
1022 SDValue &Reg, SDValue &Shift) {
1023 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
1024 return true;
1025
1027 if (ShType == AArch64_AM::InvalidShiftExtend)
1028 return false;
1029 if (!AllowROR && ShType == AArch64_AM::ROR)
1030 return false;
1031
1032 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1033 unsigned BitSize = N.getValueSizeInBits();
1034 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
1035 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
1036
1037 Reg = N.getOperand(0);
1038 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
1039 return isWorthFoldingALU(N, true);
1040 }
1041
1042 return false;
1043}
1044
1045/// Instructions that accept extend modifiers like UXTW expect the register
1046/// being extended to be a GPR32, but the incoming DAG might be acting on a
1047/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
1048/// this is the case.
1050 if (N.getValueType() == MVT::i32)
1051 return N;
1052
1053 SDLoc dl(N);
1054 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
1055}
1056
1057// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
1058template<signed Low, signed High, signed Scale>
1059bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
1060 if (!isa<ConstantSDNode>(N))
1061 return false;
1062
1063 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
1064 if ((MulImm % std::abs(Scale)) == 0) {
1065 int64_t RDVLImm = MulImm / Scale;
1066 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
1067 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
1068 return true;
1069 }
1070 }
1071
1072 return false;
1073}
1074
1075// Returns a suitable RDSVL multiplier from a left shift.
1076template <signed Low, signed High>
1077bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
1078 if (!isa<ConstantSDNode>(N))
1079 return false;
1080
1081 int64_t MulImm = 1LL << cast<ConstantSDNode>(N)->getSExtValue();
1082 if (MulImm >= Low && MulImm <= High) {
1083 Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
1084 return true;
1085 }
1086
1087 return false;
1088}
1089
1090/// SelectArithExtendedRegister - Select a "extended register" operand. This
1091/// operand folds in an extend followed by an optional left shift.
1092bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
1093 SDValue &Shift) {
1094 unsigned ShiftVal = 0;
1096
1097 if (N.getOpcode() == ISD::SHL) {
1098 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1099 if (!CSD)
1100 return false;
1101 ShiftVal = CSD->getZExtValue();
1102 if (ShiftVal > 4)
1103 return false;
1104
1105 Ext = getExtendTypeForNode(N.getOperand(0));
1107 return false;
1108
1109 Reg = N.getOperand(0).getOperand(0);
1110 } else {
1111 Ext = getExtendTypeForNode(N);
1113 return false;
1114
1115 // Don't match sext of vector extracts. These can use SMOV, but if we match
1116 // this as an extended register, we'll always fold the extend into an ALU op
1117 // user of the extend (which results in a UMOV).
1119 SDValue Op = N.getOperand(0);
1120 if (Op->getOpcode() == ISD::ANY_EXTEND)
1121 Op = Op->getOperand(0);
1122 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
1123 Op.getOperand(0).getValueType().isFixedLengthVector())
1124 return false;
1125 }
1126
1127 Reg = N.getOperand(0);
1128
1129 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
1130 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
1131 auto isDef32 = [](SDValue N) {
1132 unsigned Opc = N.getOpcode();
1133 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
1136 Opc != ISD::FREEZE;
1137 };
1138 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
1139 isDef32(Reg))
1140 return false;
1141 }
1142
1143 // AArch64 mandates that the RHS of the operation must use the smallest
1144 // register class that could contain the size being extended from. Thus,
1145 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1146 // there might not be an actual 32-bit value in the program. We can
1147 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1148 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1149 Reg = narrowIfNeeded(CurDAG, Reg);
1150 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1151 MVT::i32);
1152 return isWorthFoldingALU(N);
1153}
1154
1155/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1156/// operand is referred by the instructions have SP operand
1157bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1158 SDValue &Shift) {
1159 unsigned ShiftVal = 0;
1161
1162 if (N.getOpcode() != ISD::SHL)
1163 return false;
1164
1165 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1166 if (!CSD)
1167 return false;
1168 ShiftVal = CSD->getZExtValue();
1169 if (ShiftVal > 4)
1170 return false;
1171
1172 Ext = AArch64_AM::UXTX;
1173 Reg = N.getOperand(0);
1174 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1175 MVT::i32);
1176 return isWorthFoldingALU(N);
1177}
1178
1179/// If there's a use of this ADDlow that's not itself a load/store then we'll
1180/// need to create a real ADD instruction from it anyway and there's no point in
1181/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1182/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1183/// leads to duplicated ADRP instructions.
1185 for (auto *User : N->users()) {
1186 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1187 User->getOpcode() != ISD::ATOMIC_LOAD &&
1188 User->getOpcode() != ISD::ATOMIC_STORE)
1189 return false;
1190
1191 // ldar and stlr have much more restrictive addressing modes (just a
1192 // register).
1193 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1194 return false;
1195 }
1196
1197 return true;
1198}
1199
1200/// Check if the immediate offset is valid as a scaled immediate.
1201static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1202 unsigned Size) {
1203 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1204 Offset < (Range << Log2_32(Size)))
1205 return true;
1206 return false;
1207}
1208
1209/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1210/// immediate" address. The "Size" argument is the size in bytes of the memory
1211/// reference, which determines the scale.
1212bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1213 unsigned BW, unsigned Size,
1214 SDValue &Base,
1215 SDValue &OffImm) {
1216 SDLoc dl(N);
1217 const DataLayout &DL = CurDAG->getDataLayout();
1218 const TargetLowering *TLI = getTargetLowering();
1219 if (N.getOpcode() == ISD::FrameIndex) {
1220 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1221 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1222 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1223 return true;
1224 }
1225
1226 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1227 // selected here doesn't support labels/immediates, only base+offset.
1228 if (CurDAG->isBaseWithConstantOffset(N)) {
1229 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1230 if (IsSignedImm) {
1231 int64_t RHSC = RHS->getSExtValue();
1232 unsigned Scale = Log2_32(Size);
1233 int64_t Range = 0x1LL << (BW - 1);
1234
1235 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1236 RHSC < (Range << Scale)) {
1237 Base = N.getOperand(0);
1238 if (Base.getOpcode() == ISD::FrameIndex) {
1239 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1240 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1241 }
1242 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1243 return true;
1244 }
1245 } else {
1246 // unsigned Immediate
1247 uint64_t RHSC = RHS->getZExtValue();
1248 unsigned Scale = Log2_32(Size);
1249 uint64_t Range = 0x1ULL << BW;
1250
1251 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1252 Base = N.getOperand(0);
1253 if (Base.getOpcode() == ISD::FrameIndex) {
1254 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1255 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1256 }
1257 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1258 return true;
1259 }
1260 }
1261 }
1262 }
1263 // Base only. The address will be materialized into a register before
1264 // the memory is accessed.
1265 // add x0, Xbase, #offset
1266 // stp x1, x2, [x0]
1267 Base = N;
1268 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1269 return true;
1270}
1271
1272/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1273/// immediate" address. The "Size" argument is the size in bytes of the memory
1274/// reference, which determines the scale.
1275bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1276 SDValue &Base, SDValue &OffImm) {
1277 SDLoc dl(N);
1278 const DataLayout &DL = CurDAG->getDataLayout();
1279 const TargetLowering *TLI = getTargetLowering();
1280 if (N.getOpcode() == ISD::FrameIndex) {
1281 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1282 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1283 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1284 return true;
1285 }
1286
1287 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1288 GlobalAddressSDNode *GAN =
1289 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1290 Base = N.getOperand(0);
1291 OffImm = N.getOperand(1);
1292 if (!GAN)
1293 return true;
1294
1295 if (GAN->getOffset() % Size == 0 &&
1297 return true;
1298 }
1299
1300 if (CurDAG->isBaseWithConstantOffset(N)) {
1301 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1302 int64_t RHSC = (int64_t)RHS->getZExtValue();
1303 unsigned Scale = Log2_32(Size);
1304 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1305 Base = N.getOperand(0);
1306 if (Base.getOpcode() == ISD::FrameIndex) {
1307 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1308 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1309 }
1310 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1311 return true;
1312 }
1313 }
1314 }
1315
1316 // Before falling back to our general case, check if the unscaled
1317 // instructions can handle this. If so, that's preferable.
1318 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1319 return false;
1320
1321 // Base only. The address will be materialized into a register before
1322 // the memory is accessed.
1323 // add x0, Xbase, #offset
1324 // ldr x0, [x0]
1325 Base = N;
1326 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1327 return true;
1328}
1329
1330/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1331/// immediate" address. This should only match when there is an offset that
1332/// is not valid for a scaled immediate addressing mode. The "Size" argument
1333/// is the size in bytes of the memory reference, which is needed here to know
1334/// what is valid for a scaled immediate.
1335bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1336 SDValue &Base,
1337 SDValue &OffImm) {
1338 if (!CurDAG->isBaseWithConstantOffset(N))
1339 return false;
1340 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1341 int64_t RHSC = RHS->getSExtValue();
1342 if (RHSC >= -256 && RHSC < 256) {
1343 Base = N.getOperand(0);
1344 if (Base.getOpcode() == ISD::FrameIndex) {
1345 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1346 const TargetLowering *TLI = getTargetLowering();
1347 Base = CurDAG->getTargetFrameIndex(
1348 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1349 }
1350 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1351 return true;
1352 }
1353 }
1354 return false;
1355}
1356
1358 SDLoc dl(N);
1359 SDValue ImpDef = SDValue(
1360 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1361 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1362 N);
1363}
1364
1365/// Check if the given SHL node (\p N), can be used to form an
1366/// extended register for an addressing mode.
1367bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1368 bool WantExtend, SDValue &Offset,
1369 SDValue &SignExtend) {
1370 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1371 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1372 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1373 return false;
1374
1375 SDLoc dl(N);
1376 if (WantExtend) {
1378 getExtendTypeForNode(N.getOperand(0), true);
1380 return false;
1381
1382 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1383 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1384 MVT::i32);
1385 } else {
1386 Offset = N.getOperand(0);
1387 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1388 }
1389
1390 unsigned LegalShiftVal = Log2_32(Size);
1391 unsigned ShiftVal = CSD->getZExtValue();
1392
1393 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1394 return false;
1395
1396 return isWorthFoldingAddr(N, Size);
1397}
1398
1399bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1401 SDValue &SignExtend,
1402 SDValue &DoShift) {
1403 if (N.getOpcode() != ISD::ADD)
1404 return false;
1405 SDValue LHS = N.getOperand(0);
1406 SDValue RHS = N.getOperand(1);
1407 SDLoc dl(N);
1408
1409 // We don't want to match immediate adds here, because they are better lowered
1410 // to the register-immediate addressing modes.
1412 return false;
1413
1414 // Check if this particular node is reused in any non-memory related
1415 // operation. If yes, do not try to fold this node into the address
1416 // computation, since the computation will be kept.
1417 const SDNode *Node = N.getNode();
1418 for (SDNode *UI : Node->users()) {
1419 if (!isMemOpOrPrefetch(UI))
1420 return false;
1421 }
1422
1423 // Remember if it is worth folding N when it produces extended register.
1424 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1425
1426 // Try to match a shifted extend on the RHS.
1427 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1428 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1429 Base = LHS;
1430 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1431 return true;
1432 }
1433
1434 // Try to match a shifted extend on the LHS.
1435 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1436 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1437 Base = RHS;
1438 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1439 return true;
1440 }
1441
1442 // There was no shift, whatever else we find.
1443 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1444
1446 // Try to match an unshifted extend on the LHS.
1447 if (IsExtendedRegisterWorthFolding &&
1448 (Ext = getExtendTypeForNode(LHS, true)) !=
1450 Base = RHS;
1451 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1452 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1453 MVT::i32);
1454 if (isWorthFoldingAddr(LHS, Size))
1455 return true;
1456 }
1457
1458 // Try to match an unshifted extend on the RHS.
1459 if (IsExtendedRegisterWorthFolding &&
1460 (Ext = getExtendTypeForNode(RHS, true)) !=
1462 Base = LHS;
1463 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1464 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1465 MVT::i32);
1466 if (isWorthFoldingAddr(RHS, Size))
1467 return true;
1468 }
1469
1470 return false;
1471}
1472
1473// Check if the given immediate is preferred by ADD. If an immediate can be
1474// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1475// encoded by one MOVZ, return true.
1476static bool isPreferredADD(int64_t ImmOff) {
1477 // Constant in [0x0, 0xfff] can be encoded in ADD.
1478 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1479 return true;
1480 // Check if it can be encoded in an "ADD LSL #12".
1481 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1482 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1483 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1484 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1485 return false;
1486}
1487
1488bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1490 SDValue &SignExtend,
1491 SDValue &DoShift) {
1492 if (N.getOpcode() != ISD::ADD)
1493 return false;
1494 SDValue LHS = N.getOperand(0);
1495 SDValue RHS = N.getOperand(1);
1496 SDLoc DL(N);
1497
1498 // Check if this particular node is reused in any non-memory related
1499 // operation. If yes, do not try to fold this node into the address
1500 // computation, since the computation will be kept.
1501 const SDNode *Node = N.getNode();
1502 for (SDNode *UI : Node->users()) {
1503 if (!isMemOpOrPrefetch(UI))
1504 return false;
1505 }
1506
1507 // Watch out if RHS is a wide immediate, it can not be selected into
1508 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1509 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1510 // instructions like:
1511 // MOV X0, WideImmediate
1512 // ADD X1, BaseReg, X0
1513 // LDR X2, [X1, 0]
1514 // For such situation, using [BaseReg, XReg] addressing mode can save one
1515 // ADD/SUB:
1516 // MOV X0, WideImmediate
1517 // LDR X2, [BaseReg, X0]
1518 if (isa<ConstantSDNode>(RHS)) {
1519 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1520 // Skip the immediate can be selected by load/store addressing mode.
1521 // Also skip the immediate can be encoded by a single ADD (SUB is also
1522 // checked by using -ImmOff).
1523 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1524 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1525 return false;
1526
1527 SDValue Ops[] = { RHS };
1528 SDNode *MOVI =
1529 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1530 SDValue MOVIV = SDValue(MOVI, 0);
1531 // This ADD of two X register will be selected into [Reg+Reg] mode.
1532 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1533 }
1534
1535 // Remember if it is worth folding N when it produces extended register.
1536 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1537
1538 // Try to match a shifted extend on the RHS.
1539 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1540 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1541 Base = LHS;
1542 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1543 return true;
1544 }
1545
1546 // Try to match a shifted extend on the LHS.
1547 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1548 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1549 Base = RHS;
1550 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1551 return true;
1552 }
1553
1554 // Match any non-shifted, non-extend, non-immediate add expression.
1555 Base = LHS;
1556 Offset = RHS;
1557 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1558 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1559 // Reg1 + Reg2 is free: no check needed.
1560 return true;
1561}
1562
1563SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1564 static const unsigned RegClassIDs[] = {
1565 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1566 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1567 AArch64::dsub2, AArch64::dsub3};
1568
1569 return createTuple(Regs, RegClassIDs, SubRegs);
1570}
1571
1572SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1573 static const unsigned RegClassIDs[] = {
1574 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1575 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1576 AArch64::qsub2, AArch64::qsub3};
1577
1578 return createTuple(Regs, RegClassIDs, SubRegs);
1579}
1580
1581SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1582 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1583 AArch64::ZPR3RegClassID,
1584 AArch64::ZPR4RegClassID};
1585 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1586 AArch64::zsub2, AArch64::zsub3};
1587
1588 return createTuple(Regs, RegClassIDs, SubRegs);
1589}
1590
1591SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1592 assert(Regs.size() == 2 || Regs.size() == 4);
1593
1594 // The createTuple interface requires 3 RegClassIDs for each possible
1595 // tuple type even though we only have them for ZPR2 and ZPR4.
1596 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1597 AArch64::ZPR4Mul4RegClassID};
1598 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1599 AArch64::zsub2, AArch64::zsub3};
1600 return createTuple(Regs, RegClassIDs, SubRegs);
1601}
1602
1603SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1604 const unsigned RegClassIDs[],
1605 const unsigned SubRegs[]) {
1606 // There's no special register-class for a vector-list of 1 element: it's just
1607 // a vector.
1608 if (Regs.size() == 1)
1609 return Regs[0];
1610
1611 assert(Regs.size() >= 2 && Regs.size() <= 4);
1612
1613 SDLoc DL(Regs[0]);
1614
1616
1617 // First operand of REG_SEQUENCE is the desired RegClass.
1618 Ops.push_back(
1619 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1620
1621 // Then we get pairs of source & subregister-position for the components.
1622 for (unsigned i = 0; i < Regs.size(); ++i) {
1623 Ops.push_back(Regs[i]);
1624 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1625 }
1626
1627 SDNode *N =
1628 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1629 return SDValue(N, 0);
1630}
1631
1632void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1633 bool isExt) {
1634 SDLoc dl(N);
1635 EVT VT = N->getValueType(0);
1636
1637 unsigned ExtOff = isExt;
1638
1639 // Form a REG_SEQUENCE to force register allocation.
1640 unsigned Vec0Off = ExtOff + 1;
1641 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1642 SDValue RegSeq = createQTuple(Regs);
1643
1645 if (isExt)
1646 Ops.push_back(N->getOperand(1));
1647 Ops.push_back(RegSeq);
1648 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1649 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1650}
1651
1652static std::tuple<SDValue, SDValue>
1654 SDLoc DL(Disc);
1655 SDValue AddrDisc;
1656 SDValue ConstDisc;
1657
1658 // If this is a blend, remember the constant and address discriminators.
1659 // Otherwise, it's either a constant discriminator, or a non-blended
1660 // address discriminator.
1661 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1662 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1663 AddrDisc = Disc->getOperand(1);
1664 ConstDisc = Disc->getOperand(2);
1665 } else {
1666 ConstDisc = Disc;
1667 }
1668
1669 // If the constant discriminator (either the blend RHS, or the entire
1670 // discriminator value) isn't a 16-bit constant, bail out, and let the
1671 // discriminator be computed separately.
1672 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1673 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1674 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1675
1676 // If there's no address discriminator, use XZR directly.
1677 if (!AddrDisc)
1678 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1679
1680 return std::make_tuple(
1681 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1682 AddrDisc);
1683}
1684
1685void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1686 SDLoc DL(N);
1687 // IntrinsicID is operand #0
1688 SDValue Val = N->getOperand(1);
1689 SDValue AUTKey = N->getOperand(2);
1690 SDValue AUTDisc = N->getOperand(3);
1691
1692 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1693 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1694
1695 SDValue AUTAddrDisc, AUTConstDisc;
1696 std::tie(AUTConstDisc, AUTAddrDisc) =
1697 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1698
1699 if (!Subtarget->isX16X17Safer()) {
1700 std::vector<SDValue> Ops = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1701 // Copy deactivation symbol if present.
1702 if (N->getNumOperands() > 4)
1703 Ops.push_back(N->getOperand(4));
1704
1705 SDNode *AUT =
1706 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1707 ReplaceNode(N, AUT);
1708 } else {
1709 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1710 AArch64::X16, Val, SDValue());
1711 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1712
1713 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1714 ReplaceNode(N, AUT);
1715 }
1716}
1717
1718void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1719 SDLoc DL(N);
1720 // IntrinsicID is operand #0, if W_CHAIN it is #1
1721 int OffsetBase = N->getOpcode() == ISD::INTRINSIC_W_CHAIN ? 1 : 0;
1722 SDValue Val = N->getOperand(OffsetBase + 1);
1723 SDValue AUTKey = N->getOperand(OffsetBase + 2);
1724 SDValue AUTDisc = N->getOperand(OffsetBase + 3);
1725 SDValue PACKey = N->getOperand(OffsetBase + 4);
1726 SDValue PACDisc = N->getOperand(OffsetBase + 5);
1727 uint32_t IntNum = N->getConstantOperandVal(OffsetBase + 0);
1728 bool HasLoad = IntNum == Intrinsic::ptrauth_resign_load_relative;
1729
1730 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1731 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1732
1733 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1734 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1735
1736 SDValue AUTAddrDisc, AUTConstDisc;
1737 std::tie(AUTConstDisc, AUTAddrDisc) =
1738 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1739
1740 SDValue PACAddrDisc, PACConstDisc;
1741 std::tie(PACConstDisc, PACAddrDisc) =
1742 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1743
1744 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1745 AArch64::X16, Val, SDValue());
1746
1747 if (HasLoad) {
1748 SDValue Addend = N->getOperand(OffsetBase + 6);
1749 SDValue IncomingChain = N->getOperand(0);
1750 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc,
1751 PACKey, PACConstDisc, PACAddrDisc,
1752 Addend, IncomingChain, X16Copy.getValue(1)};
1753
1754 SDNode *AUTRELLOADPAC = CurDAG->getMachineNode(AArch64::AUTRELLOADPAC, DL,
1755 MVT::i64, MVT::Other, Ops);
1756 ReplaceNode(N, AUTRELLOADPAC);
1757 } else {
1758 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1759 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1760
1761 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1762 ReplaceNode(N, AUTPAC);
1763 }
1764}
1765
1766bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1767 LoadSDNode *LD = cast<LoadSDNode>(N);
1768 if (LD->isUnindexed())
1769 return false;
1770 EVT VT = LD->getMemoryVT();
1771 EVT DstVT = N->getValueType(0);
1772 ISD::MemIndexedMode AM = LD->getAddressingMode();
1773 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1774 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1775 int OffsetVal = (int)OffsetOp->getZExtValue();
1776
1777 // We're not doing validity checking here. That was done when checking
1778 // if we should mark the load as indexed or not. We're just selecting
1779 // the right instruction.
1780 unsigned Opcode = 0;
1781
1782 ISD::LoadExtType ExtType = LD->getExtensionType();
1783 bool InsertTo64 = false;
1784 if (VT == MVT::i64)
1785 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1786 else if (VT == MVT::i32) {
1787 if (ExtType == ISD::NON_EXTLOAD)
1788 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1789 else if (ExtType == ISD::SEXTLOAD)
1790 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1791 else {
1792 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1793 InsertTo64 = true;
1794 // The result of the load is only i32. It's the subreg_to_reg that makes
1795 // it into an i64.
1796 DstVT = MVT::i32;
1797 }
1798 } else if (VT == MVT::i16) {
1799 if (ExtType == ISD::SEXTLOAD) {
1800 if (DstVT == MVT::i64)
1801 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1802 else
1803 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1804 } else {
1805 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1806 InsertTo64 = DstVT == MVT::i64;
1807 // The result of the load is only i32. It's the subreg_to_reg that makes
1808 // it into an i64.
1809 DstVT = MVT::i32;
1810 }
1811 } else if (VT == MVT::i8) {
1812 if (ExtType == ISD::SEXTLOAD) {
1813 if (DstVT == MVT::i64)
1814 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1815 else
1816 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1817 } else {
1818 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1819 InsertTo64 = DstVT == MVT::i64;
1820 // The result of the load is only i32. It's the subreg_to_reg that makes
1821 // it into an i64.
1822 DstVT = MVT::i32;
1823 }
1824 } else if (VT == MVT::f16) {
1825 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1826 } else if (VT == MVT::bf16) {
1827 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1828 } else if (VT == MVT::f32) {
1829 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1830 } else if (VT == MVT::f64 ||
1831 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1832 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1833 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1834 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1835 } else if (VT.is64BitVector()) {
1836 if (IsPre || OffsetVal != 8)
1837 return false;
1838 switch (VT.getScalarSizeInBits()) {
1839 case 8:
1840 Opcode = AArch64::LD1Onev8b_POST;
1841 break;
1842 case 16:
1843 Opcode = AArch64::LD1Onev4h_POST;
1844 break;
1845 case 32:
1846 Opcode = AArch64::LD1Onev2s_POST;
1847 break;
1848 case 64:
1849 Opcode = AArch64::LD1Onev1d_POST;
1850 break;
1851 default:
1852 llvm_unreachable("Expected vector element to be a power of 2");
1853 }
1854 } else if (VT.is128BitVector()) {
1855 if (IsPre || OffsetVal != 16)
1856 return false;
1857 switch (VT.getScalarSizeInBits()) {
1858 case 8:
1859 Opcode = AArch64::LD1Onev16b_POST;
1860 break;
1861 case 16:
1862 Opcode = AArch64::LD1Onev8h_POST;
1863 break;
1864 case 32:
1865 Opcode = AArch64::LD1Onev4s_POST;
1866 break;
1867 case 64:
1868 Opcode = AArch64::LD1Onev2d_POST;
1869 break;
1870 default:
1871 llvm_unreachable("Expected vector element to be a power of 2");
1872 }
1873 } else
1874 return false;
1875 SDValue Chain = LD->getChain();
1876 SDValue Base = LD->getBasePtr();
1877 SDLoc dl(N);
1878 // LD1 encodes an immediate offset by using XZR as the offset register.
1879 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1880 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1881 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1882 SDValue Ops[] = { Base, Offset, Chain };
1883 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1884 MVT::Other, Ops);
1885
1886 // Transfer memoperands.
1887 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1888 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1889
1890 // Either way, we're replacing the node, so tell the caller that.
1891 SDValue LoadedVal = SDValue(Res, 1);
1892 if (InsertTo64) {
1893 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1894 LoadedVal = SDValue(CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, dl,
1895 MVT::i64, LoadedVal, SubReg),
1896 0);
1897 }
1898
1899 ReplaceUses(SDValue(N, 0), LoadedVal);
1900 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1901 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1902 CurDAG->RemoveDeadNode(N);
1903 return true;
1904}
1905
1906void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1907 unsigned SubRegIdx) {
1908 SDLoc dl(N);
1909 EVT VT = N->getValueType(0);
1910 SDValue Chain = N->getOperand(0);
1911
1912 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1913 Chain};
1914
1915 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1916
1917 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1918 SDValue SuperReg = SDValue(Ld, 0);
1919 for (unsigned i = 0; i < NumVecs; ++i)
1920 ReplaceUses(SDValue(N, i),
1921 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1922
1923 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1924
1925 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1926 // because it's too simple to have needed special treatment during lowering.
1927 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1928 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1929 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1930 }
1931
1932 CurDAG->RemoveDeadNode(N);
1933}
1934
1935void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1936 unsigned Opc, unsigned SubRegIdx) {
1937 SDLoc dl(N);
1938 EVT VT = N->getValueType(0);
1939 SDValue Chain = N->getOperand(0);
1940
1941 SDValue Ops[] = {N->getOperand(1), // Mem operand
1942 N->getOperand(2), // Incremental
1943 Chain};
1944
1945 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1946 MVT::Untyped, MVT::Other};
1947
1948 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1949
1950 // Update uses of write back register
1951 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1952
1953 // Update uses of vector list
1954 SDValue SuperReg = SDValue(Ld, 1);
1955 if (NumVecs == 1)
1956 ReplaceUses(SDValue(N, 0), SuperReg);
1957 else
1958 for (unsigned i = 0; i < NumVecs; ++i)
1959 ReplaceUses(SDValue(N, i),
1960 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1961
1962 // Transfer memoperands.
1963 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1964 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1965
1966 // Update the chain
1967 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1968 CurDAG->RemoveDeadNode(N);
1969}
1970
1971/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1972/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1973/// new Base and an SDValue representing the new offset.
1974std::tuple<unsigned, SDValue, SDValue>
1975AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1976 unsigned Opc_ri,
1977 const SDValue &OldBase,
1978 const SDValue &OldOffset,
1979 unsigned Scale) {
1980 SDValue NewBase = OldBase;
1981 SDValue NewOffset = OldOffset;
1982 // Detect a possible Reg+Imm addressing mode.
1983 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1984 N, OldBase, NewBase, NewOffset);
1985
1986 // Detect a possible reg+reg addressing mode, but only if we haven't already
1987 // detected a Reg+Imm one.
1988 const bool IsRegReg =
1989 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1990
1991 // Select the instruction.
1992 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1993}
1994
1995enum class SelectTypeKind {
1996 Int1 = 0,
1997 Int = 1,
1998 FP = 2,
2000};
2001
2002/// This function selects an opcode from a list of opcodes, which is
2003/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
2004/// element types, in this order.
2005template <SelectTypeKind Kind>
2006static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
2007 // Only match scalable vector VTs
2008 if (!VT.isScalableVector())
2009 return 0;
2010
2011 EVT EltVT = VT.getVectorElementType();
2012 unsigned Key = VT.getVectorMinNumElements();
2013 switch (Kind) {
2015 break;
2017 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
2018 EltVT != MVT::i64)
2019 return 0;
2020 break;
2022 if (EltVT != MVT::i1)
2023 return 0;
2024 break;
2025 case SelectTypeKind::FP:
2026 if (EltVT == MVT::bf16)
2027 Key = 16;
2028 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
2029 EltVT != MVT::f64)
2030 return 0;
2031 break;
2032 }
2033
2034 unsigned Offset;
2035 switch (Key) {
2036 case 16: // 8-bit or bf16
2037 Offset = 0;
2038 break;
2039 case 8: // 16-bit
2040 Offset = 1;
2041 break;
2042 case 4: // 32-bit
2043 Offset = 2;
2044 break;
2045 case 2: // 64-bit
2046 Offset = 3;
2047 break;
2048 default:
2049 return 0;
2050 }
2051
2052 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
2053}
2054
2055// This function is almost identical to SelectWhilePair, but has an
2056// extra check on the range of the immediate operand.
2057// TODO: Merge these two functions together at some point?
2058void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
2059 // Immediate can be either 0 or 1.
2060 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
2061 if (Imm->getZExtValue() > 1)
2062 return;
2063
2064 SDLoc DL(N);
2065 EVT VT = N->getValueType(0);
2066 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
2067 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2068 SDValue SuperReg = SDValue(WhilePair, 0);
2069
2070 for (unsigned I = 0; I < 2; ++I)
2071 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2072 AArch64::psub0 + I, DL, VT, SuperReg));
2073
2074 CurDAG->RemoveDeadNode(N);
2075}
2076
2077void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
2078 SDLoc DL(N);
2079 EVT VT = N->getValueType(0);
2080
2081 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
2082
2083 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2084 SDValue SuperReg = SDValue(WhilePair, 0);
2085
2086 for (unsigned I = 0; I < 2; ++I)
2087 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2088 AArch64::psub0 + I, DL, VT, SuperReg));
2089
2090 CurDAG->RemoveDeadNode(N);
2091}
2092
2093void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
2094 unsigned Opcode) {
2095 EVT VT = N->getValueType(0);
2096 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2097 SDValue Ops = createZTuple(Regs);
2098 SDLoc DL(N);
2099 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
2100 SDValue SuperReg = SDValue(Intrinsic, 0);
2101 for (unsigned i = 0; i < NumVecs; ++i)
2102 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2103 AArch64::zsub0 + i, DL, VT, SuperReg));
2104
2105 CurDAG->RemoveDeadNode(N);
2106}
2107
2108void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
2109 unsigned Opcode) {
2110 SDLoc DL(N);
2111 EVT VT = N->getValueType(0);
2112 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
2113 Ops.push_back(/*Chain*/ N->getOperand(0));
2114
2115 SDNode *Instruction =
2116 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
2117 SDValue SuperReg = SDValue(Instruction, 0);
2118
2119 for (unsigned i = 0; i < NumVecs; ++i)
2120 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2121 AArch64::zsub0 + i, DL, VT, SuperReg));
2122
2123 // Copy chain
2124 unsigned ChainIdx = NumVecs;
2125 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
2126 CurDAG->RemoveDeadNode(N);
2127}
2128
2129void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
2130 unsigned NumVecs,
2131 bool IsZmMulti,
2132 unsigned Opcode,
2133 bool HasPred) {
2134 assert(Opcode != 0 && "Unexpected opcode");
2135
2136 SDLoc DL(N);
2137 EVT VT = N->getValueType(0);
2138 SDUse *OpsIter = N->op_begin() + 1; // Skip intrinsic ID
2140
2141 auto GetMultiVecOperand = [&]() {
2142 SmallVector<SDValue, 4> Regs(OpsIter, OpsIter + NumVecs);
2143 OpsIter += NumVecs;
2144 return createZMulTuple(Regs);
2145 };
2146
2147 if (HasPred)
2148 Ops.push_back(*OpsIter++);
2149
2150 Ops.push_back(GetMultiVecOperand());
2151 if (IsZmMulti)
2152 Ops.push_back(GetMultiVecOperand());
2153 else
2154 Ops.push_back(*OpsIter++);
2155
2156 // Append any remaining operands.
2157 Ops.append(OpsIter, N->op_end());
2158 SDNode *Intrinsic;
2159 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
2160 SDValue SuperReg = SDValue(Intrinsic, 0);
2161 for (unsigned i = 0; i < NumVecs; ++i)
2162 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2163 AArch64::zsub0 + i, DL, VT, SuperReg));
2164
2165 CurDAG->RemoveDeadNode(N);
2166}
2167
2168void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2169 unsigned Scale, unsigned Opc_ri,
2170 unsigned Opc_rr, bool IsIntr) {
2171 assert(Scale < 5 && "Invalid scaling value.");
2172 SDLoc DL(N);
2173 EVT VT = N->getValueType(0);
2174 SDValue Chain = N->getOperand(0);
2175
2176 // Optimize addressing mode.
2178 unsigned Opc;
2179 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2180 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
2181 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2182
2183 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2184 Base, // Memory operand
2185 Offset, Chain};
2186
2187 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2188
2189 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2190 SDValue SuperReg = SDValue(Load, 0);
2191 for (unsigned i = 0; i < NumVecs; ++i)
2192 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2193 AArch64::zsub0 + i, DL, VT, SuperReg));
2194
2195 // Copy chain
2196 unsigned ChainIdx = NumVecs;
2197 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2198 CurDAG->RemoveDeadNode(N);
2199}
2200
2201void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2202 unsigned NumVecs,
2203 unsigned Scale,
2204 unsigned Opc_ri,
2205 unsigned Opc_rr) {
2206 assert(Scale < 4 && "Invalid scaling value.");
2207 SDLoc DL(N);
2208 EVT VT = N->getValueType(0);
2209 SDValue Chain = N->getOperand(0);
2210
2211 SDValue PNg = N->getOperand(2);
2212 SDValue Base = N->getOperand(3);
2213 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2214 unsigned Opc;
2215 std::tie(Opc, Base, Offset) =
2216 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2217
2218 SDValue Ops[] = {PNg, // Predicate-as-counter
2219 Base, // Memory operand
2220 Offset, Chain};
2221
2222 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2223
2224 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2225 SDValue SuperReg = SDValue(Load, 0);
2226 for (unsigned i = 0; i < NumVecs; ++i)
2227 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2228 AArch64::zsub0 + i, DL, VT, SuperReg));
2229
2230 // Copy chain
2231 unsigned ChainIdx = NumVecs;
2232 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2233 CurDAG->RemoveDeadNode(N);
2234}
2235
2236void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2237 unsigned Opcode) {
2238 if (N->getValueType(0) != MVT::nxv4f32)
2239 return;
2240 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2241}
2242
2243void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2244 unsigned NumOutVecs,
2245 unsigned Opc,
2246 uint32_t MaxImm) {
2247 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2248 if (Imm->getZExtValue() > MaxImm)
2249 return;
2250
2251 SDValue ZtValue;
2252 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2253 return;
2254
2255 SDValue Chain = Node->getOperand(0);
2256 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4), Chain};
2257 SDLoc DL(Node);
2258 EVT VT = Node->getValueType(0);
2259
2260 SDNode *Instruction =
2261 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2262 SDValue SuperReg = SDValue(Instruction, 0);
2263
2264 for (unsigned I = 0; I < NumOutVecs; ++I)
2265 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2266 AArch64::zsub0 + I, DL, VT, SuperReg));
2267
2268 // Copy chain
2269 unsigned ChainIdx = NumOutVecs;
2270 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2271 CurDAG->RemoveDeadNode(Node);
2272}
2273
2274void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2275 unsigned NumOutVecs,
2276 unsigned Opc) {
2277 SDValue ZtValue;
2278 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2279 return;
2280
2281 SDValue Chain = Node->getOperand(0);
2282 SDValue Ops[] = {ZtValue,
2283 createZMulTuple({Node->getOperand(3), Node->getOperand(4)}),
2284 Chain};
2285
2286 SDLoc DL(Node);
2287 EVT VT = Node->getValueType(0);
2288
2289 SDNode *Instruction =
2290 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2291 SDValue SuperReg = SDValue(Instruction, 0);
2292
2293 for (unsigned I = 0; I < NumOutVecs; ++I)
2294 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2295 AArch64::zsub0 + I, DL, VT, SuperReg));
2296
2297 // Copy chain
2298 unsigned ChainIdx = NumOutVecs;
2299 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2300 CurDAG->RemoveDeadNode(Node);
2301}
2302
2303void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2304 unsigned Op) {
2305 SDLoc DL(N);
2306 EVT VT = N->getValueType(0);
2307
2308 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2309 SDValue Zd = createZMulTuple(Regs);
2310 SDValue Zn = N->getOperand(1 + NumVecs);
2311 SDValue Zm = N->getOperand(2 + NumVecs);
2312
2313 SDValue Ops[] = {Zd, Zn, Zm};
2314
2315 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2316 SDValue SuperReg = SDValue(Intrinsic, 0);
2317 for (unsigned i = 0; i < NumVecs; ++i)
2318 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2319 AArch64::zsub0 + i, DL, VT, SuperReg));
2320
2321 CurDAG->RemoveDeadNode(N);
2322}
2323
2324bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2325 switch (BaseReg) {
2326 default:
2327 return false;
2328 case AArch64::ZA:
2329 case AArch64::ZAB0:
2330 if (TileNum == 0)
2331 break;
2332 return false;
2333 case AArch64::ZAH0:
2334 if (TileNum <= 1)
2335 break;
2336 return false;
2337 case AArch64::ZAS0:
2338 if (TileNum <= 3)
2339 break;
2340 return false;
2341 case AArch64::ZAD0:
2342 if (TileNum <= 7)
2343 break;
2344 return false;
2345 }
2346
2347 BaseReg += TileNum;
2348 return true;
2349}
2350
2351template <unsigned MaxIdx, unsigned Scale>
2352void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2353 unsigned BaseReg, unsigned Op) {
2354 unsigned TileNum = 0;
2355 if (BaseReg != AArch64::ZA)
2356 TileNum = N->getConstantOperandVal(2);
2357
2358 if (!SelectSMETile(BaseReg, TileNum))
2359 return;
2360
2361 SDValue SliceBase, Base, Offset;
2362 if (BaseReg == AArch64::ZA)
2363 SliceBase = N->getOperand(2);
2364 else
2365 SliceBase = N->getOperand(3);
2366
2367 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2368 return;
2369
2370 SDLoc DL(N);
2371 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2372 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2373 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2374
2375 EVT VT = N->getValueType(0);
2376 for (unsigned I = 0; I < NumVecs; ++I)
2377 ReplaceUses(SDValue(N, I),
2378 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2379 SDValue(Mov, 0)));
2380 // Copy chain
2381 unsigned ChainIdx = NumVecs;
2382 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2383 CurDAG->RemoveDeadNode(N);
2384}
2385
2386void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2387 unsigned Op, unsigned MaxIdx,
2388 unsigned Scale, unsigned BaseReg) {
2389 // Slice can be in different positions
2390 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2391 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2392 SDValue SliceBase = N->getOperand(2);
2393 if (BaseReg != AArch64::ZA)
2394 SliceBase = N->getOperand(3);
2395
2397 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2398 return;
2399 // The correct Za tile number is computed in Machine Instruction
2400 // See EmitZAInstr
2401 // DAG cannot select Za tile as an output register with ZReg
2402 SDLoc DL(N);
2404 if (BaseReg != AArch64::ZA )
2405 Ops.push_back(N->getOperand(2));
2406 Ops.push_back(Base);
2407 Ops.push_back(Offset);
2408 Ops.push_back(N->getOperand(0)); //Chain
2409 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2410
2411 EVT VT = N->getValueType(0);
2412 for (unsigned I = 0; I < NumVecs; ++I)
2413 ReplaceUses(SDValue(N, I),
2414 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2415 SDValue(Mov, 0)));
2416
2417 // Copy chain
2418 unsigned ChainIdx = NumVecs;
2419 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2420 CurDAG->RemoveDeadNode(N);
2421}
2422
2423void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2424 unsigned NumOutVecs,
2425 bool IsTupleInput,
2426 unsigned Opc) {
2427 SDLoc DL(N);
2428 EVT VT = N->getValueType(0);
2429 unsigned NumInVecs = N->getNumOperands() - 1;
2430
2432 if (IsTupleInput) {
2433 assert((NumInVecs == 2 || NumInVecs == 4) &&
2434 "Don't know how to handle multi-register input!");
2435 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2436 Ops.push_back(createZMulTuple(Regs));
2437 } else {
2438 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2439 for (unsigned I = 0; I < NumInVecs; I++)
2440 Ops.push_back(N->getOperand(1 + I));
2441 }
2442
2443 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2444 SDValue SuperReg = SDValue(Res, 0);
2445
2446 for (unsigned I = 0; I < NumOutVecs; I++)
2447 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2448 AArch64::zsub0 + I, DL, VT, SuperReg));
2449 CurDAG->RemoveDeadNode(N);
2450}
2451
2452void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2453 unsigned Opc) {
2454 SDLoc dl(N);
2455 EVT VT = N->getOperand(2)->getValueType(0);
2456
2457 // Form a REG_SEQUENCE to force register allocation.
2458 bool Is128Bit = VT.getSizeInBits() == 128;
2459 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2460 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2461
2462 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2463 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2464
2465 // Transfer memoperands.
2466 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2467 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2468
2469 ReplaceNode(N, St);
2470}
2471
2472void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2473 unsigned Scale, unsigned Opc_rr,
2474 unsigned Opc_ri) {
2475 SDLoc dl(N);
2476
2477 // Form a REG_SEQUENCE to force register allocation.
2478 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2479 SDValue RegSeq = createZTuple(Regs);
2480
2481 // Optimize addressing mode.
2482 unsigned Opc;
2484 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2485 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2486 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2487
2488 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2489 Base, // address
2490 Offset, // offset
2491 N->getOperand(0)}; // chain
2492 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2493
2494 ReplaceNode(N, St);
2495}
2496
2497bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2498 SDValue &OffImm) {
2499 SDLoc dl(N);
2500 const DataLayout &DL = CurDAG->getDataLayout();
2501 const TargetLowering *TLI = getTargetLowering();
2502
2503 // Try to match it for the frame address
2504 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2505 int FI = FINode->getIndex();
2506 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2507 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2508 return true;
2509 }
2510
2511 return false;
2512}
2513
2514void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2515 unsigned Opc) {
2516 SDLoc dl(N);
2517 EVT VT = N->getOperand(2)->getValueType(0);
2518 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2519 MVT::Other}; // Type for the Chain
2520
2521 // Form a REG_SEQUENCE to force register allocation.
2522 bool Is128Bit = VT.getSizeInBits() == 128;
2523 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2524 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2525
2526 SDValue Ops[] = {RegSeq,
2527 N->getOperand(NumVecs + 1), // base register
2528 N->getOperand(NumVecs + 2), // Incremental
2529 N->getOperand(0)}; // Chain
2530 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2531
2532 // Transfer memoperands.
2533 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2534 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2535
2536 ReplaceNode(N, St);
2537}
2538
2539namespace {
2540/// WidenVector - Given a value in the V64 register class, produce the
2541/// equivalent value in the V128 register class.
2542class WidenVector {
2543 SelectionDAG &DAG;
2544
2545public:
2546 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2547
2548 SDValue operator()(SDValue V64Reg) {
2549 EVT VT = V64Reg.getValueType();
2550 unsigned NarrowSize = VT.getVectorNumElements();
2551 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2552 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2553 SDLoc DL(V64Reg);
2554
2555 SDValue Undef =
2556 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2557 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2558 }
2559};
2560} // namespace
2561
2562/// NarrowVector - Given a value in the V128 register class, produce the
2563/// equivalent value in the V64 register class.
2565 EVT VT = V128Reg.getValueType();
2566 unsigned WideSize = VT.getVectorNumElements();
2567 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2568 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2569
2570 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2571 V128Reg);
2572}
2573
2574void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2575 unsigned Opc) {
2576 SDLoc dl(N);
2577 EVT VT = N->getValueType(0);
2578 bool Narrow = VT.getSizeInBits() == 64;
2579
2580 // Form a REG_SEQUENCE to force register allocation.
2581 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2582
2583 if (Narrow)
2584 transform(Regs, Regs.begin(),
2585 WidenVector(*CurDAG));
2586
2587 SDValue RegSeq = createQTuple(Regs);
2588
2589 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2590
2591 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2592
2593 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2594 N->getOperand(NumVecs + 3), N->getOperand(0)};
2595 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2596 SDValue SuperReg = SDValue(Ld, 0);
2597
2598 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2599 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2600 AArch64::qsub2, AArch64::qsub3 };
2601 for (unsigned i = 0; i < NumVecs; ++i) {
2602 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2603 if (Narrow)
2604 NV = NarrowVector(NV, *CurDAG);
2605 ReplaceUses(SDValue(N, i), NV);
2606 }
2607
2608 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2609 CurDAG->RemoveDeadNode(N);
2610}
2611
2612void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2613 unsigned Opc) {
2614 SDLoc dl(N);
2615 EVT VT = N->getValueType(0);
2616 bool Narrow = VT.getSizeInBits() == 64;
2617
2618 // Form a REG_SEQUENCE to force register allocation.
2619 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2620
2621 if (Narrow)
2622 transform(Regs, Regs.begin(),
2623 WidenVector(*CurDAG));
2624
2625 SDValue RegSeq = createQTuple(Regs);
2626
2627 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2628 RegSeq->getValueType(0), MVT::Other};
2629
2630 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2631
2632 SDValue Ops[] = {RegSeq,
2633 CurDAG->getTargetConstant(LaneNo, dl,
2634 MVT::i64), // Lane Number
2635 N->getOperand(NumVecs + 2), // Base register
2636 N->getOperand(NumVecs + 3), // Incremental
2637 N->getOperand(0)};
2638 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2639
2640 // Update uses of the write back register
2641 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2642
2643 // Update uses of the vector list
2644 SDValue SuperReg = SDValue(Ld, 1);
2645 if (NumVecs == 1) {
2646 ReplaceUses(SDValue(N, 0),
2647 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2648 } else {
2649 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2650 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2651 AArch64::qsub2, AArch64::qsub3 };
2652 for (unsigned i = 0; i < NumVecs; ++i) {
2653 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2654 SuperReg);
2655 if (Narrow)
2656 NV = NarrowVector(NV, *CurDAG);
2657 ReplaceUses(SDValue(N, i), NV);
2658 }
2659 }
2660
2661 // Update the Chain
2662 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2663 CurDAG->RemoveDeadNode(N);
2664}
2665
2666void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2667 unsigned Opc) {
2668 SDLoc dl(N);
2669 EVT VT = N->getOperand(2)->getValueType(0);
2670 bool Narrow = VT.getSizeInBits() == 64;
2671
2672 // Form a REG_SEQUENCE to force register allocation.
2673 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2674
2675 if (Narrow)
2676 transform(Regs, Regs.begin(),
2677 WidenVector(*CurDAG));
2678
2679 SDValue RegSeq = createQTuple(Regs);
2680
2681 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2682
2683 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2684 N->getOperand(NumVecs + 3), N->getOperand(0)};
2685 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2686
2687 // Transfer memoperands.
2688 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2689 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2690
2691 ReplaceNode(N, St);
2692}
2693
2694void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2695 unsigned Opc) {
2696 SDLoc dl(N);
2697 EVT VT = N->getOperand(2)->getValueType(0);
2698 bool Narrow = VT.getSizeInBits() == 64;
2699
2700 // Form a REG_SEQUENCE to force register allocation.
2701 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2702
2703 if (Narrow)
2704 transform(Regs, Regs.begin(),
2705 WidenVector(*CurDAG));
2706
2707 SDValue RegSeq = createQTuple(Regs);
2708
2709 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2710 MVT::Other};
2711
2712 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2713
2714 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2715 N->getOperand(NumVecs + 2), // Base Register
2716 N->getOperand(NumVecs + 3), // Incremental
2717 N->getOperand(0)};
2718 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2719
2720 // Transfer memoperands.
2721 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2722 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2723
2724 ReplaceNode(N, St);
2725}
2726
2728 unsigned &Opc, SDValue &Opd0,
2729 unsigned &LSB, unsigned &MSB,
2730 unsigned NumberOfIgnoredLowBits,
2731 bool BiggerPattern) {
2732 assert(N->getOpcode() == ISD::AND &&
2733 "N must be a AND operation to call this function");
2734
2735 EVT VT = N->getValueType(0);
2736
2737 // Here we can test the type of VT and return false when the type does not
2738 // match, but since it is done prior to that call in the current context
2739 // we turned that into an assert to avoid redundant code.
2740 assert((VT == MVT::i32 || VT == MVT::i64) &&
2741 "Type checking must have been done before calling this function");
2742
2743 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2744 // changed the AND node to a 32-bit mask operation. We'll have to
2745 // undo that as part of the transform here if we want to catch all
2746 // the opportunities.
2747 // Currently the NumberOfIgnoredLowBits argument helps to recover
2748 // from these situations when matching bigger pattern (bitfield insert).
2749
2750 // For unsigned extracts, check for a shift right and mask
2751 uint64_t AndImm = 0;
2752 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2753 return false;
2754
2755 const SDNode *Op0 = N->getOperand(0).getNode();
2756
2757 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2758 // simplified. Try to undo that
2759 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2760
2761 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2762 if (AndImm & (AndImm + 1))
2763 return false;
2764
2765 bool ClampMSB = false;
2766 uint64_t SrlImm = 0;
2767 // Handle the SRL + ANY_EXTEND case.
2768 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2769 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2770 // Extend the incoming operand of the SRL to 64-bit.
2771 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2772 // Make sure to clamp the MSB so that we preserve the semantics of the
2773 // original operations.
2774 ClampMSB = true;
2775 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2777 SrlImm)) {
2778 // If the shift result was truncated, we can still combine them.
2779 Opd0 = Op0->getOperand(0).getOperand(0);
2780
2781 // Use the type of SRL node.
2782 VT = Opd0->getValueType(0);
2783 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2784 Opd0 = Op0->getOperand(0);
2785 ClampMSB = (VT == MVT::i32);
2786 } else if (BiggerPattern) {
2787 // Let's pretend a 0 shift right has been performed.
2788 // The resulting code will be at least as good as the original one
2789 // plus it may expose more opportunities for bitfield insert pattern.
2790 // FIXME: Currently we limit this to the bigger pattern, because
2791 // some optimizations expect AND and not UBFM.
2792 Opd0 = N->getOperand(0);
2793 } else
2794 return false;
2795
2796 // Bail out on large immediates. This happens when no proper
2797 // combining/constant folding was performed.
2798 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2799 LLVM_DEBUG(
2800 (dbgs() << N
2801 << ": Found large shift immediate, this should not happen\n"));
2802 return false;
2803 }
2804
2805 LSB = SrlImm;
2806 MSB = SrlImm +
2807 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2808 : llvm::countr_one<uint64_t>(AndImm)) -
2809 1;
2810 if (ClampMSB)
2811 // Since we're moving the extend before the right shift operation, we need
2812 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2813 // the zeros which would get shifted in with the original right shift
2814 // operation.
2815 MSB = MSB > 31 ? 31 : MSB;
2816
2817 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2818 return true;
2819}
2820
2822 SDValue &Opd0, unsigned &Immr,
2823 unsigned &Imms) {
2824 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2825
2826 EVT VT = N->getValueType(0);
2827 unsigned BitWidth = VT.getSizeInBits();
2828 assert((VT == MVT::i32 || VT == MVT::i64) &&
2829 "Type checking must have been done before calling this function");
2830
2831 SDValue Op = N->getOperand(0);
2832 if (Op->getOpcode() == ISD::TRUNCATE) {
2833 Op = Op->getOperand(0);
2834 VT = Op->getValueType(0);
2835 BitWidth = VT.getSizeInBits();
2836 }
2837
2838 uint64_t ShiftImm;
2839 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2840 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2841 return false;
2842
2843 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2844 if (ShiftImm + Width > BitWidth)
2845 return false;
2846
2847 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2848 Opd0 = Op.getOperand(0);
2849 Immr = ShiftImm;
2850 Imms = ShiftImm + Width - 1;
2851 return true;
2852}
2853
2855 SDValue &Opd0, unsigned &LSB,
2856 unsigned &MSB) {
2857 // We are looking for the following pattern which basically extracts several
2858 // continuous bits from the source value and places it from the LSB of the
2859 // destination value, all other bits of the destination value or set to zero:
2860 //
2861 // Value2 = AND Value, MaskImm
2862 // SRL Value2, ShiftImm
2863 //
2864 // with MaskImm >> ShiftImm to search for the bit width.
2865 //
2866 // This gets selected into a single UBFM:
2867 //
2868 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2869 //
2870
2871 if (N->getOpcode() != ISD::SRL)
2872 return false;
2873
2874 uint64_t AndMask = 0;
2875 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2876 return false;
2877
2878 Opd0 = N->getOperand(0).getOperand(0);
2879
2880 uint64_t SrlImm = 0;
2881 if (!isIntImmediate(N->getOperand(1), SrlImm))
2882 return false;
2883
2884 // Check whether we really have several bits extract here.
2885 if (!isMask_64(AndMask >> SrlImm))
2886 return false;
2887
2888 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2889 LSB = SrlImm;
2890 MSB = llvm::Log2_64(AndMask);
2891 return true;
2892}
2893
2894static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2895 unsigned &Immr, unsigned &Imms,
2896 bool BiggerPattern) {
2897 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2898 "N must be a SHR/SRA operation to call this function");
2899
2900 EVT VT = N->getValueType(0);
2901
2902 // Here we can test the type of VT and return false when the type does not
2903 // match, but since it is done prior to that call in the current context
2904 // we turned that into an assert to avoid redundant code.
2905 assert((VT == MVT::i32 || VT == MVT::i64) &&
2906 "Type checking must have been done before calling this function");
2907
2908 // Check for AND + SRL doing several bits extract.
2909 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2910 return true;
2911
2912 // We're looking for a shift of a shift.
2913 uint64_t ShlImm = 0;
2914 uint64_t TruncBits = 0;
2915 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2916 Opd0 = N->getOperand(0).getOperand(0);
2917 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2918 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2919 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2920 // be considered as setting high 32 bits as zero. Our strategy here is to
2921 // always generate 64bit UBFM. This consistency will help the CSE pass
2922 // later find more redundancy.
2923 Opd0 = N->getOperand(0).getOperand(0);
2924 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2925 VT = Opd0.getValueType();
2926 assert(VT == MVT::i64 && "the promoted type should be i64");
2927 } else if (BiggerPattern) {
2928 // Let's pretend a 0 shift left has been performed.
2929 // FIXME: Currently we limit this to the bigger pattern case,
2930 // because some optimizations expect AND and not UBFM
2931 Opd0 = N->getOperand(0);
2932 } else
2933 return false;
2934
2935 // Missing combines/constant folding may have left us with strange
2936 // constants.
2937 if (ShlImm >= VT.getSizeInBits()) {
2938 LLVM_DEBUG(
2939 (dbgs() << N
2940 << ": Found large shift immediate, this should not happen\n"));
2941 return false;
2942 }
2943
2944 uint64_t SrlImm = 0;
2945 if (!isIntImmediate(N->getOperand(1), SrlImm))
2946 return false;
2947
2948 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2949 "bad amount in shift node!");
2950 int immr = SrlImm - ShlImm;
2951 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2952 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2953 // SRA requires a signed extraction
2954 if (VT == MVT::i32)
2955 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2956 else
2957 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2958 return true;
2959}
2960
2961bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2962 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2963
2964 EVT VT = N->getValueType(0);
2965 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2966 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2967 return false;
2968
2969 uint64_t ShiftImm;
2970 SDValue Op = N->getOperand(0);
2971 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2972 return false;
2973
2974 SDLoc dl(N);
2975 // Extend the incoming operand of the shift to 64-bits.
2976 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2977 unsigned Immr = ShiftImm;
2978 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2979 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2980 CurDAG->getTargetConstant(Imms, dl, VT)};
2981 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2982 return true;
2983}
2984
2985static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2986 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2987 unsigned NumberOfIgnoredLowBits = 0,
2988 bool BiggerPattern = false) {
2989 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2990 return false;
2991
2992 switch (N->getOpcode()) {
2993 default:
2994 if (!N->isMachineOpcode())
2995 return false;
2996 break;
2997 case ISD::AND:
2998 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2999 NumberOfIgnoredLowBits, BiggerPattern);
3000 case ISD::SRL:
3001 case ISD::SRA:
3002 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
3003
3005 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
3006 }
3007
3008 unsigned NOpc = N->getMachineOpcode();
3009 switch (NOpc) {
3010 default:
3011 return false;
3012 case AArch64::SBFMWri:
3013 case AArch64::UBFMWri:
3014 case AArch64::SBFMXri:
3015 case AArch64::UBFMXri:
3016 Opc = NOpc;
3017 Opd0 = N->getOperand(0);
3018 Immr = N->getConstantOperandVal(1);
3019 Imms = N->getConstantOperandVal(2);
3020 return true;
3021 }
3022 // Unreachable
3023 return false;
3024}
3025
3026bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
3027 unsigned Opc, Immr, Imms;
3028 SDValue Opd0;
3029 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
3030 return false;
3031
3032 EVT VT = N->getValueType(0);
3033 SDLoc dl(N);
3034
3035 // If the bit extract operation is 64bit but the original type is 32bit, we
3036 // need to add one EXTRACT_SUBREG.
3037 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
3038 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
3039 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
3040
3041 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
3042 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
3043 MVT::i32, SDValue(BFM, 0));
3044 ReplaceNode(N, Inner.getNode());
3045 return true;
3046 }
3047
3048 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
3049 CurDAG->getTargetConstant(Imms, dl, VT)};
3050 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3051 return true;
3052}
3053
3054/// Does DstMask form a complementary pair with the mask provided by
3055/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
3056/// this asks whether DstMask zeroes precisely those bits that will be set by
3057/// the other half.
3058static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
3059 unsigned NumberOfIgnoredHighBits, EVT VT) {
3060 assert((VT == MVT::i32 || VT == MVT::i64) &&
3061 "i32 or i64 mask type expected!");
3062 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
3063
3064 // Enable implicitTrunc as we're intentionally ignoring high bits.
3065 APInt SignificantDstMask =
3066 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
3067 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
3068
3069 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
3070 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
3071}
3072
3073// Look for bits that will be useful for later uses.
3074// A bit is consider useless as soon as it is dropped and never used
3075// before it as been dropped.
3076// E.g., looking for useful bit of x
3077// 1. y = x & 0x7
3078// 2. z = y >> 2
3079// After #1, x useful bits are 0x7, then the useful bits of x, live through
3080// y.
3081// After #2, the useful bits of x are 0x4.
3082// However, if x is used on an unpredictable instruction, then all its bits
3083// are useful.
3084// E.g.
3085// 1. y = x & 0x7
3086// 2. z = y >> 2
3087// 3. str x, [@x]
3088static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
3089
3091 unsigned Depth) {
3092 uint64_t Imm =
3093 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
3094 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
3095 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
3096 getUsefulBits(Op, UsefulBits, Depth + 1);
3097}
3098
3100 uint64_t Imm, uint64_t MSB,
3101 unsigned Depth) {
3102 // inherit the bitwidth value
3103 APInt OpUsefulBits(UsefulBits);
3104 OpUsefulBits = 1;
3105
3106 if (MSB >= Imm) {
3107 OpUsefulBits <<= MSB - Imm + 1;
3108 --OpUsefulBits;
3109 // The interesting part will be in the lower part of the result
3110 getUsefulBits(Op, OpUsefulBits, Depth + 1);
3111 // The interesting part was starting at Imm in the argument
3112 OpUsefulBits <<= Imm;
3113 } else {
3114 OpUsefulBits <<= MSB + 1;
3115 --OpUsefulBits;
3116 // The interesting part will be shifted in the result
3117 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
3118 getUsefulBits(Op, OpUsefulBits, Depth + 1);
3119 // The interesting part was at zero in the argument
3120 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
3121 }
3122
3123 UsefulBits &= OpUsefulBits;
3124}
3125
3126static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
3127 unsigned Depth) {
3128 uint64_t Imm =
3129 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
3130 uint64_t MSB =
3131 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3132
3133 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
3134}
3135
3137 unsigned Depth) {
3138 uint64_t ShiftTypeAndValue =
3139 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3140 APInt Mask(UsefulBits);
3141 Mask.clearAllBits();
3142 Mask.flipAllBits();
3143
3144 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
3145 // Shift Left
3146 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3147 Mask <<= ShiftAmt;
3148 getUsefulBits(Op, Mask, Depth + 1);
3149 Mask.lshrInPlace(ShiftAmt);
3150 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
3151 // Shift Right
3152 // We do not handle AArch64_AM::ASR, because the sign will change the
3153 // number of useful bits
3154 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3155 Mask.lshrInPlace(ShiftAmt);
3156 getUsefulBits(Op, Mask, Depth + 1);
3157 Mask <<= ShiftAmt;
3158 } else
3159 return;
3160
3161 UsefulBits &= Mask;
3162}
3163
3164static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
3165 unsigned Depth) {
3166 uint64_t Imm =
3167 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3168 uint64_t MSB =
3169 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
3170
3171 APInt OpUsefulBits(UsefulBits);
3172 OpUsefulBits = 1;
3173
3174 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3175 ResultUsefulBits.flipAllBits();
3176 APInt Mask(UsefulBits.getBitWidth(), 0);
3177
3178 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
3179
3180 if (MSB >= Imm) {
3181 // The instruction is a BFXIL.
3182 uint64_t Width = MSB - Imm + 1;
3183 uint64_t LSB = Imm;
3184
3185 OpUsefulBits <<= Width;
3186 --OpUsefulBits;
3187
3188 if (Op.getOperand(1) == Orig) {
3189 // Copy the low bits from the result to bits starting from LSB.
3190 Mask = ResultUsefulBits & OpUsefulBits;
3191 Mask <<= LSB;
3192 }
3193
3194 if (Op.getOperand(0) == Orig)
3195 // Bits starting from LSB in the input contribute to the result.
3196 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3197 } else {
3198 // The instruction is a BFI.
3199 uint64_t Width = MSB + 1;
3200 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3201
3202 OpUsefulBits <<= Width;
3203 --OpUsefulBits;
3204 OpUsefulBits <<= LSB;
3205
3206 if (Op.getOperand(1) == Orig) {
3207 // Copy the bits from the result to the zero bits.
3208 Mask = ResultUsefulBits & OpUsefulBits;
3209 Mask.lshrInPlace(LSB);
3210 }
3211
3212 if (Op.getOperand(0) == Orig)
3213 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3214 }
3215
3216 UsefulBits &= Mask;
3217}
3218
3219static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3220 SDValue Orig, unsigned Depth) {
3221
3222 // Users of this node should have already been instruction selected
3223 // FIXME: Can we turn that into an assert?
3224 if (!UserNode->isMachineOpcode())
3225 return;
3226
3227 switch (UserNode->getMachineOpcode()) {
3228 default:
3229 return;
3230 case AArch64::ANDSWri:
3231 case AArch64::ANDSXri:
3232 case AArch64::ANDWri:
3233 case AArch64::ANDXri:
3234 // We increment Depth only when we call the getUsefulBits
3235 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3236 Depth);
3237 case AArch64::UBFMWri:
3238 case AArch64::UBFMXri:
3239 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3240
3241 case AArch64::ORRWrs:
3242 case AArch64::ORRXrs:
3243 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3244 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3245 Depth);
3246 return;
3247 case AArch64::BFMWri:
3248 case AArch64::BFMXri:
3249 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3250
3251 case AArch64::STRBBui:
3252 case AArch64::STURBBi:
3253 if (UserNode->getOperand(0) != Orig)
3254 return;
3255 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3256 return;
3257
3258 case AArch64::STRHHui:
3259 case AArch64::STURHHi:
3260 if (UserNode->getOperand(0) != Orig)
3261 return;
3262 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3263 return;
3264 }
3265}
3266
3267static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3269 return;
3270 // Initialize UsefulBits
3271 if (!Depth) {
3272 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3273 // At the beginning, assume every produced bits is useful
3274 UsefulBits = APInt(Bitwidth, 0);
3275 UsefulBits.flipAllBits();
3276 }
3277 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3278
3279 for (SDNode *Node : Op.getNode()->users()) {
3280 // A use cannot produce useful bits
3281 APInt UsefulBitsForUse = APInt(UsefulBits);
3282 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3283 UsersUsefulBits |= UsefulBitsForUse;
3284 }
3285 // UsefulBits contains the produced bits that are meaningful for the
3286 // current definition, thus a user cannot make a bit meaningful at
3287 // this point
3288 UsefulBits &= UsersUsefulBits;
3289}
3290
3291/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3292/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3293/// 0, return Op unchanged.
3294static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3295 if (ShlAmount == 0)
3296 return Op;
3297
3298 EVT VT = Op.getValueType();
3299 SDLoc dl(Op);
3300 unsigned BitWidth = VT.getSizeInBits();
3301 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3302
3303 SDNode *ShiftNode;
3304 if (ShlAmount > 0) {
3305 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3306 ShiftNode = CurDAG->getMachineNode(
3307 UBFMOpc, dl, VT, Op,
3308 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3309 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3310 } else {
3311 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3312 assert(ShlAmount < 0 && "expected right shift");
3313 int ShrAmount = -ShlAmount;
3314 ShiftNode = CurDAG->getMachineNode(
3315 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3316 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3317 }
3318
3319 return SDValue(ShiftNode, 0);
3320}
3321
3322// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3323static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3324 bool BiggerPattern,
3325 const uint64_t NonZeroBits,
3326 SDValue &Src, int &DstLSB,
3327 int &Width);
3328
3329// For bit-field-positioning pattern "shl VAL, N)".
3330static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3331 bool BiggerPattern,
3332 const uint64_t NonZeroBits,
3333 SDValue &Src, int &DstLSB,
3334 int &Width);
3335
3336/// Does this tree qualify as an attempt to move a bitfield into position,
3337/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3339 bool BiggerPattern, SDValue &Src,
3340 int &DstLSB, int &Width) {
3341 EVT VT = Op.getValueType();
3342 unsigned BitWidth = VT.getSizeInBits();
3343 (void)BitWidth;
3344 assert(BitWidth == 32 || BitWidth == 64);
3345
3346 KnownBits Known = CurDAG->computeKnownBits(Op);
3347
3348 // Non-zero in the sense that they're not provably zero, which is the key
3349 // point if we want to use this value
3350 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3351 if (!isShiftedMask_64(NonZeroBits))
3352 return false;
3353
3354 switch (Op.getOpcode()) {
3355 default:
3356 break;
3357 case ISD::AND:
3358 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3359 NonZeroBits, Src, DstLSB, Width);
3360 case ISD::SHL:
3361 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3362 NonZeroBits, Src, DstLSB, Width);
3363 }
3364
3365 return false;
3366}
3367
3369 bool BiggerPattern,
3370 const uint64_t NonZeroBits,
3371 SDValue &Src, int &DstLSB,
3372 int &Width) {
3373 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3374
3375 EVT VT = Op.getValueType();
3376 assert((VT == MVT::i32 || VT == MVT::i64) &&
3377 "Caller guarantees VT is one of i32 or i64");
3378 (void)VT;
3379
3380 uint64_t AndImm;
3381 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3382 return false;
3383
3384 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3385 // 1) (AndImm & (1 << POS) == 0)
3386 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3387 //
3388 // 1) and 2) don't agree so something must be wrong (e.g., in
3389 // 'SelectionDAG::computeKnownBits')
3390 assert((~AndImm & NonZeroBits) == 0 &&
3391 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3392
3393 SDValue AndOp0 = Op.getOperand(0);
3394
3395 uint64_t ShlImm;
3396 SDValue ShlOp0;
3397 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3398 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3399 ShlOp0 = AndOp0.getOperand(0);
3400 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3402 ShlImm)) {
3403 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3404
3405 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3406 SDValue ShlVal = AndOp0.getOperand(0);
3407
3408 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3409 // expect VT to be MVT::i32.
3410 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3411
3412 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3413 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3414 } else
3415 return false;
3416
3417 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3418 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3419 // AndOp0+AND.
3420 if (!BiggerPattern && !AndOp0.hasOneUse())
3421 return false;
3422
3423 DstLSB = llvm::countr_zero(NonZeroBits);
3424 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3425
3426 // Bail out on large Width. This happens when no proper combining / constant
3427 // folding was performed.
3428 if (Width >= (int)VT.getSizeInBits()) {
3429 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3430 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3431 // "val".
3432 // If VT is i32, what Width >= 32 means:
3433 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3434 // demands at least 'Width' bits (after dag-combiner). This together with
3435 // `any_extend` Op (undefined higher bits) indicates missed combination
3436 // when lowering the 'and' IR instruction to an machine IR instruction.
3437 LLVM_DEBUG(
3438 dbgs()
3439 << "Found large Width in bit-field-positioning -- this indicates no "
3440 "proper combining / constant folding was performed\n");
3441 return false;
3442 }
3443
3444 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3445 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3446 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3447 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3448 // which case it is not profitable to insert an extra shift.
3449 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3450 return false;
3451
3452 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3453 return true;
3454}
3455
3456// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3457// UBFIZ.
3459 SDValue &Src, int &DstLSB,
3460 int &Width) {
3461 // Caller should have verified that N is a left shift with constant shift
3462 // amount; asserts that.
3463 assert(Op.getOpcode() == ISD::SHL &&
3464 "Op.getNode() should be a SHL node to call this function");
3465 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3466 "Op.getNode() should shift ShlImm to call this function");
3467
3468 uint64_t AndImm = 0;
3469 SDValue Op0 = Op.getOperand(0);
3470 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3471 return false;
3472
3473 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3474 if (isMask_64(ShiftedAndImm)) {
3475 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3476 // should end with Mask, and could be prefixed with random bits if those
3477 // bits are shifted out.
3478 //
3479 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3480 // the AND result corresponding to those bits are shifted out, so it's fine
3481 // to not extract them.
3482 Width = llvm::countr_one(ShiftedAndImm);
3483 DstLSB = ShlImm;
3484 Src = Op0.getOperand(0);
3485 return true;
3486 }
3487 return false;
3488}
3489
3491 bool BiggerPattern,
3492 const uint64_t NonZeroBits,
3493 SDValue &Src, int &DstLSB,
3494 int &Width) {
3495 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3496
3497 EVT VT = Op.getValueType();
3498 assert((VT == MVT::i32 || VT == MVT::i64) &&
3499 "Caller guarantees that type is i32 or i64");
3500 (void)VT;
3501
3502 uint64_t ShlImm;
3503 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3504 return false;
3505
3506 if (!BiggerPattern && !Op.hasOneUse())
3507 return false;
3508
3509 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3510 return true;
3511
3512 DstLSB = llvm::countr_zero(NonZeroBits);
3513 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3514
3515 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3516 return false;
3517
3518 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3519 return true;
3520}
3521
3522static bool isShiftedMask(uint64_t Mask, EVT VT) {
3523 assert(VT == MVT::i32 || VT == MVT::i64);
3524 if (VT == MVT::i32)
3525 return isShiftedMask_32(Mask);
3526 return isShiftedMask_64(Mask);
3527}
3528
3529// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3530// inserted only sets known zero bits.
3532 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3533
3534 EVT VT = N->getValueType(0);
3535 if (VT != MVT::i32 && VT != MVT::i64)
3536 return false;
3537
3538 unsigned BitWidth = VT.getSizeInBits();
3539
3540 uint64_t OrImm;
3541 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3542 return false;
3543
3544 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3545 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3546 // performance neutral.
3548 return false;
3549
3550 uint64_t MaskImm;
3551 SDValue And = N->getOperand(0);
3552 // Must be a single use AND with an immediate operand.
3553 if (!And.hasOneUse() ||
3554 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3555 return false;
3556
3557 // Compute the Known Zero for the AND as this allows us to catch more general
3558 // cases than just looking for AND with imm.
3559 KnownBits Known = CurDAG->computeKnownBits(And);
3560
3561 // Non-zero in the sense that they're not provably zero, which is the key
3562 // point if we want to use this value.
3563 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3564
3565 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3566 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3567 return false;
3568
3569 // The bits being inserted must only set those bits that are known to be zero.
3570 if ((OrImm & NotKnownZero) != 0) {
3571 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3572 // currently handle this case.
3573 return false;
3574 }
3575
3576 // BFI/BFXIL dst, src, #lsb, #width.
3577 int LSB = llvm::countr_one(NotKnownZero);
3578 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3579
3580 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3581 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3582 unsigned ImmS = Width - 1;
3583
3584 // If we're creating a BFI instruction avoid cases where we need more
3585 // instructions to materialize the BFI constant as compared to the original
3586 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3587 // should be no worse in this case.
3588 bool IsBFI = LSB != 0;
3589 uint64_t BFIImm = OrImm >> LSB;
3590 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3591 // We have a BFI instruction and we know the constant can't be materialized
3592 // with a ORR-immediate with the zero register.
3593 unsigned OrChunks = 0, BFIChunks = 0;
3594 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3595 if (((OrImm >> Shift) & 0xFFFF) != 0)
3596 ++OrChunks;
3597 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3598 ++BFIChunks;
3599 }
3600 if (BFIChunks > OrChunks)
3601 return false;
3602 }
3603
3604 // Materialize the constant to be inserted.
3605 SDLoc DL(N);
3606 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3607 SDNode *MOVI = CurDAG->getMachineNode(
3608 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3609
3610 // Create the BFI/BFXIL instruction.
3611 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3612 CurDAG->getTargetConstant(ImmR, DL, VT),
3613 CurDAG->getTargetConstant(ImmS, DL, VT)};
3614 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3615 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3616 return true;
3617}
3618
3620 SDValue &ShiftedOperand,
3621 uint64_t &EncodedShiftImm) {
3622 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3623 if (!Dst.hasOneUse())
3624 return false;
3625
3626 EVT VT = Dst.getValueType();
3627 assert((VT == MVT::i32 || VT == MVT::i64) &&
3628 "Caller should guarantee that VT is one of i32 or i64");
3629 const unsigned SizeInBits = VT.getSizeInBits();
3630
3631 SDLoc DL(Dst.getNode());
3632 uint64_t AndImm, ShlImm;
3633 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3634 isShiftedMask_64(AndImm)) {
3635 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3636 SDValue DstOp0 = Dst.getOperand(0);
3637 if (!DstOp0.hasOneUse())
3638 return false;
3639
3640 // An example to illustrate the transformation
3641 // From:
3642 // lsr x8, x1, #1
3643 // and x8, x8, #0x3f80
3644 // bfxil x8, x1, #0, #7
3645 // To:
3646 // and x8, x23, #0x7f
3647 // ubfx x9, x23, #8, #7
3648 // orr x23, x8, x9, lsl #7
3649 //
3650 // The number of instructions remains the same, but ORR is faster than BFXIL
3651 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3652 // the dependency chain is improved after the transformation.
3653 uint64_t SrlImm;
3654 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3655 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3656 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3657 unsigned MaskWidth =
3658 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3659 unsigned UBFMOpc =
3660 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3661 SDNode *UBFMNode = CurDAG->getMachineNode(
3662 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3663 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3664 VT),
3665 CurDAG->getTargetConstant(
3666 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3667 ShiftedOperand = SDValue(UBFMNode, 0);
3668 EncodedShiftImm = AArch64_AM::getShifterImm(
3669 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3670 return true;
3671 }
3672 }
3673 return false;
3674 }
3675
3676 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3677 ShiftedOperand = Dst.getOperand(0);
3678 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3679 return true;
3680 }
3681
3682 uint64_t SrlImm;
3683 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3684 ShiftedOperand = Dst.getOperand(0);
3685 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3686 return true;
3687 }
3688 return false;
3689}
3690
3691// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3692// the operands and select it to AArch64::ORR with shifted registers if
3693// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3694static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3695 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3696 const bool BiggerPattern) {
3697 EVT VT = N->getValueType(0);
3698 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3699 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3700 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3701 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3702 assert((VT == MVT::i32 || VT == MVT::i64) &&
3703 "Expect result type to be i32 or i64 since N is combinable to BFM");
3704 SDLoc DL(N);
3705
3706 // Bail out if BFM simplifies away one node in BFM Dst.
3707 if (OrOpd1 != Dst)
3708 return false;
3709
3710 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3711 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3712 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3713 if (BiggerPattern) {
3714 uint64_t SrcAndImm;
3715 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3716 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3717 // OrOpd0 = AND Src, #Mask
3718 // So BFM simplifies away one AND node from Src and doesn't simplify away
3719 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3720 // one node (from Rd), ORR is better since it has higher throughput and
3721 // smaller latency than BFM on many AArch64 processors (and for the rest
3722 // ORR is at least as good as BFM).
3723 SDValue ShiftedOperand;
3724 uint64_t EncodedShiftImm;
3725 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3726 EncodedShiftImm)) {
3727 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3728 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3729 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3730 return true;
3731 }
3732 }
3733 return false;
3734 }
3735
3736 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3737
3738 uint64_t ShlImm;
3739 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3740 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3741 SDValue Ops[] = {
3742 Dst, Src,
3743 CurDAG->getTargetConstant(
3745 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3746 return true;
3747 }
3748
3749 // Select the following pattern to left-shifted operand rather than BFI.
3750 // %val1 = op ..
3751 // %val2 = shl %val1, #imm
3752 // %res = or %val1, %val2
3753 //
3754 // If N is selected to be BFI, we know that
3755 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3756 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3757 //
3758 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3759 if (OrOpd0.getOperand(0) == OrOpd1) {
3760 SDValue Ops[] = {
3761 OrOpd1, OrOpd1,
3762 CurDAG->getTargetConstant(
3764 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3765 return true;
3766 }
3767 }
3768
3769 uint64_t SrlImm;
3770 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3771 // Select the following pattern to right-shifted operand rather than BFXIL.
3772 // %val1 = op ..
3773 // %val2 = lshr %val1, #imm
3774 // %res = or %val1, %val2
3775 //
3776 // If N is selected to be BFXIL, we know that
3777 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3778 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3779 //
3780 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3781 if (OrOpd0.getOperand(0) == OrOpd1) {
3782 SDValue Ops[] = {
3783 OrOpd1, OrOpd1,
3784 CurDAG->getTargetConstant(
3786 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3787 return true;
3788 }
3789 }
3790
3791 return false;
3792}
3793
3794static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3795 SelectionDAG *CurDAG) {
3796 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3797
3798 EVT VT = N->getValueType(0);
3799 if (VT != MVT::i32 && VT != MVT::i64)
3800 return false;
3801
3802 unsigned BitWidth = VT.getSizeInBits();
3803
3804 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3805 // have the expected shape. Try to undo that.
3806
3807 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3808 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3809
3810 // Given a OR operation, check if we have the following pattern
3811 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3812 // isBitfieldExtractOp)
3813 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3814 // countTrailingZeros(mask2) == imm2 - imm + 1
3815 // f = d | c
3816 // if yes, replace the OR instruction with:
3817 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3818
3819 // OR is commutative, check all combinations of operand order and values of
3820 // BiggerPattern, i.e.
3821 // Opd0, Opd1, BiggerPattern=false
3822 // Opd1, Opd0, BiggerPattern=false
3823 // Opd0, Opd1, BiggerPattern=true
3824 // Opd1, Opd0, BiggerPattern=true
3825 // Several of these combinations may match, so check with BiggerPattern=false
3826 // first since that will produce better results by matching more instructions
3827 // and/or inserting fewer extra instructions.
3828 for (int I = 0; I < 4; ++I) {
3829
3830 SDValue Dst, Src;
3831 unsigned ImmR, ImmS;
3832 bool BiggerPattern = I / 2;
3833 SDValue OrOpd0Val = N->getOperand(I % 2);
3834 SDNode *OrOpd0 = OrOpd0Val.getNode();
3835 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3836 SDNode *OrOpd1 = OrOpd1Val.getNode();
3837
3838 unsigned BFXOpc;
3839 int DstLSB, Width;
3840 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3841 NumberOfIgnoredLowBits, BiggerPattern)) {
3842 // Check that the returned opcode is compatible with the pattern,
3843 // i.e., same type and zero extended (U and not S)
3844 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3845 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3846 continue;
3847
3848 // Compute the width of the bitfield insertion
3849 DstLSB = 0;
3850 Width = ImmS - ImmR + 1;
3851 // FIXME: This constraint is to catch bitfield insertion we may
3852 // want to widen the pattern if we want to grab general bitfield
3853 // move case
3854 if (Width <= 0)
3855 continue;
3856
3857 // If the mask on the insertee is correct, we have a BFXIL operation. We
3858 // can share the ImmR and ImmS values from the already-computed UBFM.
3859 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3860 BiggerPattern,
3861 Src, DstLSB, Width)) {
3862 ImmR = (BitWidth - DstLSB) % BitWidth;
3863 ImmS = Width - 1;
3864 } else
3865 continue;
3866
3867 // Check the second part of the pattern
3868 EVT VT = OrOpd1Val.getValueType();
3869 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3870
3871 // Compute the Known Zero for the candidate of the first operand.
3872 // This allows to catch more general case than just looking for
3873 // AND with imm. Indeed, simplify-demanded-bits may have removed
3874 // the AND instruction because it proves it was useless.
3875 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3876
3877 // Check if there is enough room for the second operand to appear
3878 // in the first one
3879 APInt BitsToBeInserted =
3880 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3881
3882 if ((BitsToBeInserted & ~Known.Zero) != 0)
3883 continue;
3884
3885 // Set the first operand
3886 uint64_t Imm;
3887 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3888 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3889 // In that case, we can eliminate the AND
3890 Dst = OrOpd1->getOperand(0);
3891 else
3892 // Maybe the AND has been removed by simplify-demanded-bits
3893 // or is useful because it discards more bits
3894 Dst = OrOpd1Val;
3895
3896 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3897 // with shifted operand is more efficient.
3898 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3899 BiggerPattern))
3900 return true;
3901
3902 // both parts match
3903 SDLoc DL(N);
3904 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3905 CurDAG->getTargetConstant(ImmS, DL, VT)};
3906 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3907 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3908 return true;
3909 }
3910
3911 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3912 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3913 // mask (e.g., 0x000ffff0).
3914 uint64_t Mask0Imm, Mask1Imm;
3915 SDValue And0 = N->getOperand(0);
3916 SDValue And1 = N->getOperand(1);
3917 if (And0.hasOneUse() && And1.hasOneUse() &&
3918 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3919 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3920 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3921 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3922
3923 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3924 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3925 // bits to be inserted.
3926 if (isShiftedMask(Mask0Imm, VT)) {
3927 std::swap(And0, And1);
3928 std::swap(Mask0Imm, Mask1Imm);
3929 }
3930
3931 SDValue Src = And1->getOperand(0);
3932 SDValue Dst = And0->getOperand(0);
3933 unsigned LSB = llvm::countr_zero(Mask1Imm);
3934 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3935
3936 // The BFXIL inserts the low-order bits from a source register, so right
3937 // shift the needed bits into place.
3938 SDLoc DL(N);
3939 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3940 uint64_t LsrImm = LSB;
3941 if (Src->hasOneUse() &&
3942 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3943 (LsrImm + LSB) < BitWidth) {
3944 Src = Src->getOperand(0);
3945 LsrImm += LSB;
3946 }
3947
3948 SDNode *LSR = CurDAG->getMachineNode(
3949 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3950 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3951
3952 // BFXIL is an alias of BFM, so translate to BFM operands.
3953 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3954 unsigned ImmS = Width - 1;
3955
3956 // Create the BFXIL instruction.
3957 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3958 CurDAG->getTargetConstant(ImmR, DL, VT),
3959 CurDAG->getTargetConstant(ImmS, DL, VT)};
3960 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3961 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3962 return true;
3963 }
3964
3965 return false;
3966}
3967
3968bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3969 if (N->getOpcode() != ISD::OR)
3970 return false;
3971
3972 APInt NUsefulBits;
3973 getUsefulBits(SDValue(N, 0), NUsefulBits);
3974
3975 // If all bits are not useful, just return UNDEF.
3976 if (!NUsefulBits) {
3977 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3978 return true;
3979 }
3980
3981 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3982 return true;
3983
3984 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3985}
3986
3987/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3988/// equivalent of a left shift by a constant amount followed by an and masking
3989/// out a contiguous set of bits.
3990bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3991 if (N->getOpcode() != ISD::AND)
3992 return false;
3993
3994 EVT VT = N->getValueType(0);
3995 if (VT != MVT::i32 && VT != MVT::i64)
3996 return false;
3997
3998 SDValue Op0;
3999 int DstLSB, Width;
4000 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
4001 Op0, DstLSB, Width))
4002 return false;
4003
4004 // ImmR is the rotate right amount.
4005 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
4006 // ImmS is the most significant bit of the source to be moved.
4007 unsigned ImmS = Width - 1;
4008
4009 SDLoc DL(N);
4010 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
4011 CurDAG->getTargetConstant(ImmS, DL, VT)};
4012 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
4013 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4014 return true;
4015}
4016
4017/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
4018/// variable shift/rotate instructions.
4019bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
4020 EVT VT = N->getValueType(0);
4021
4022 unsigned Opc;
4023 switch (N->getOpcode()) {
4024 case ISD::ROTR:
4025 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
4026 break;
4027 case ISD::SHL:
4028 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
4029 break;
4030 case ISD::SRL:
4031 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
4032 break;
4033 case ISD::SRA:
4034 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
4035 break;
4036 default:
4037 return false;
4038 }
4039
4040 uint64_t Size;
4041 uint64_t Bits;
4042 if (VT == MVT::i32) {
4043 Bits = 5;
4044 Size = 32;
4045 } else if (VT == MVT::i64) {
4046 Bits = 6;
4047 Size = 64;
4048 } else
4049 return false;
4050
4051 SDValue ShiftAmt = N->getOperand(1);
4052 SDLoc DL(N);
4053 SDValue NewShiftAmt;
4054
4055 // Skip over an extend of the shift amount.
4056 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
4057 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
4058 ShiftAmt = ShiftAmt->getOperand(0);
4059
4060 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
4061 SDValue Add0 = ShiftAmt->getOperand(0);
4062 SDValue Add1 = ShiftAmt->getOperand(1);
4063 uint64_t Add0Imm;
4064 uint64_t Add1Imm;
4065 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
4066 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
4067 // to avoid the ADD/SUB.
4068 NewShiftAmt = Add0;
4069 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
4070 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
4071 (Add0Imm % Size == 0)) {
4072 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
4073 // to generate a NEG instead of a SUB from a constant.
4074 unsigned NegOpc;
4075 unsigned ZeroReg;
4076 EVT SubVT = ShiftAmt->getValueType(0);
4077 if (SubVT == MVT::i32) {
4078 NegOpc = AArch64::SUBWrr;
4079 ZeroReg = AArch64::WZR;
4080 } else {
4081 assert(SubVT == MVT::i64);
4082 NegOpc = AArch64::SUBXrr;
4083 ZeroReg = AArch64::XZR;
4084 }
4085 SDValue Zero =
4086 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
4087 MachineSDNode *Neg =
4088 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
4089 NewShiftAmt = SDValue(Neg, 0);
4090 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
4091 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
4092 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
4093 // to generate a NOT instead of a SUB from a constant.
4094 unsigned NotOpc;
4095 unsigned ZeroReg;
4096 EVT SubVT = ShiftAmt->getValueType(0);
4097 if (SubVT == MVT::i32) {
4098 NotOpc = AArch64::ORNWrr;
4099 ZeroReg = AArch64::WZR;
4100 } else {
4101 assert(SubVT == MVT::i64);
4102 NotOpc = AArch64::ORNXrr;
4103 ZeroReg = AArch64::XZR;
4104 }
4105 SDValue Zero =
4106 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
4107 MachineSDNode *Not =
4108 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
4109 NewShiftAmt = SDValue(Not, 0);
4110 } else
4111 return false;
4112 } else {
4113 // If the shift amount is masked with an AND, check that the mask covers the
4114 // bits that are implicitly ANDed off by the above opcodes and if so, skip
4115 // the AND.
4116 uint64_t MaskImm;
4117 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
4118 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
4119 return false;
4120
4121 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
4122 return false;
4123
4124 NewShiftAmt = ShiftAmt->getOperand(0);
4125 }
4126
4127 // Narrow/widen the shift amount to match the size of the shift operation.
4128 if (VT == MVT::i32)
4129 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
4130 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
4131 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
4132 MachineSDNode *Ext = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, VT,
4133 NewShiftAmt, SubReg);
4134 NewShiftAmt = SDValue(Ext, 0);
4135 }
4136
4137 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
4138 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4139 return true;
4140}
4141
4143 SDValue &FixedPos,
4144 unsigned RegWidth,
4145 bool isReciprocal) {
4146 APFloat FVal(0.0);
4148 FVal = CN->getValueAPF();
4149 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
4150 // Some otherwise illegal constants are allowed in this case.
4151 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
4152 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
4153 return false;
4154
4155 ConstantPoolSDNode *CN =
4156 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
4157 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
4158 } else
4159 return false;
4160
4161 if (unsigned FBits =
4162 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal)) {
4163 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4164 return true;
4165 }
4166
4167 return false;
4168}
4169
4171 SDValue N,
4172 SDValue &FixedPos,
4173 unsigned RegWidth,
4174 bool isReciprocal) {
4175 if ((N.getOpcode() == AArch64ISD::NVCAST || N.getOpcode() == ISD::BITCAST) &&
4176 N.getValueType().getScalarSizeInBits() ==
4177 N.getOperand(0).getValueType().getScalarSizeInBits())
4178 N = N.getOperand(0);
4179
4180 auto ImmToFloat = [RegWidth](APInt Imm) {
4181 switch (RegWidth) {
4182 case 16:
4183 return APFloat(APFloat::IEEEhalf(), Imm);
4184 case 32:
4185 return APFloat(APFloat::IEEEsingle(), Imm);
4186 case 64:
4187 return APFloat(APFloat::IEEEdouble(), Imm);
4188 default:
4189 llvm_unreachable("Unexpected RegWidth!");
4190 };
4191 };
4192
4193 APFloat FVal(0.0);
4194 switch (N->getOpcode()) {
4195 case AArch64ISD::MOVIshift:
4196 FVal = ImmToFloat(APInt(RegWidth, N.getConstantOperandVal(0)
4197 << N.getConstantOperandVal(1)));
4198 break;
4199 case AArch64ISD::FMOV:
4200 FVal = ImmToFloat(DecodeFMOVImm(N.getConstantOperandVal(0), RegWidth));
4201 break;
4202 case AArch64ISD::DUP:
4203 if (isa<ConstantSDNode>(N.getOperand(0)))
4204 FVal = ImmToFloat(N.getConstantOperandAPInt(0).trunc(RegWidth));
4205 else
4206 return false;
4207 break;
4208 default:
4209 return false;
4210 }
4211
4212 if (unsigned FBits =
4213 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal)) {
4214 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4215 return true;
4216 }
4217
4218 return false;
4219}
4220
4221bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4222 unsigned RegWidth) {
4223 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4224 /*isReciprocal*/ false);
4225}
4226
4227bool AArch64DAGToDAGISel::SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos,
4228 unsigned RegWidth) {
4230 CurDAG, N, FixedPos, RegWidth, /*isReciprocal*/ false);
4231}
4232
4233bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperandVec(SDValue N,
4234 SDValue &FixedPos,
4235 unsigned RegWidth) {
4237 CurDAG, N, FixedPos, RegWidth, /*isReciprocal*/ true);
4238}
4239
4240bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4241 SDValue &FixedPos,
4242 unsigned RegWidth) {
4243 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4244 /*isReciprocal*/ true);
4245}
4246
4247// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4248// of the string and obtains the integer values from them and combines these
4249// into a single value to be used in the MRS/MSR instruction.
4252 RegString.split(Fields, ':');
4253
4254 if (Fields.size() == 1)
4255 return -1;
4256
4257 assert(Fields.size() == 5
4258 && "Invalid number of fields in read register string");
4259
4261 bool AllIntFields = true;
4262
4263 for (StringRef Field : Fields) {
4264 unsigned IntField;
4265 AllIntFields &= !Field.getAsInteger(10, IntField);
4266 Ops.push_back(IntField);
4267 }
4268
4269 assert(AllIntFields &&
4270 "Unexpected non-integer value in special register string.");
4271 (void)AllIntFields;
4272
4273 // Need to combine the integer fields of the string into a single value
4274 // based on the bit encoding of MRS/MSR instruction.
4275 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | (Ops[3] << 3) |
4276 (Ops[4]);
4277}
4278
4279// Lower the read_register intrinsic to an MRS instruction node if the special
4280// register string argument is either of the form detailed in the ALCE (the
4281// form described in getIntOperandsFromRegisterString) or is a named register
4282// known by the MRS SysReg mapper.
4283bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4284 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4285 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4286 SDLoc DL(N);
4287
4288 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4289
4290 unsigned Opcode64Bit = AArch64::MRS;
4291 int Imm = getIntOperandFromRegisterString(RegString->getString());
4292 if (Imm == -1) {
4293 // No match, Use the sysreg mapper to map the remaining possible strings to
4294 // the value for the register to be used for the instruction operand.
4295 const auto *TheReg =
4296 AArch64SysReg::lookupSysRegByName(RegString->getString());
4297 if (TheReg && TheReg->Readable &&
4298 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4299 Imm = TheReg->Encoding;
4300 else
4301 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4302
4303 if (Imm == -1) {
4304 // Still no match, see if this is "pc" or give up.
4305 if (!ReadIs128Bit && RegString->getString() == "pc") {
4306 Opcode64Bit = AArch64::ADR;
4307 Imm = 0;
4308 } else {
4309 // Not a system register. It may name an allocatable 64-bit GPR/FPR read
4310 // by the MSVC __getReg/__getRegFp intrinsics. Emit a pseudo that
4311 // carries the source register as an immediate so the read does not
4312 // reference an undefined physical register (which the machine verifier
4313 // rejects); the AsmPrinter materializes the real mov/fmov.
4314 Register PReg = Subtarget->getTargetLowering()->matchRegisterName(
4315 RegString->getString());
4316 unsigned PseudoOp = 0;
4317 if (AArch64::GPR64RegClass.contains(PReg))
4318 PseudoOp = AArch64::READ_REGISTER_GPR64;
4319 else if (AArch64::FPR64RegClass.contains(PReg))
4320 PseudoOp = AArch64::READ_REGISTER_FPR64;
4321 if (!ReadIs128Bit && PseudoOp && N->getValueType(0) == MVT::i64) {
4322 CurDAG->SelectNodeTo(N, PseudoOp, MVT::i64, MVT::Other,
4323 {CurDAG->getTargetConstant(PReg, DL, MVT::i32),
4324 N->getOperand(0)});
4325 return true;
4326 }
4327 return false;
4328 }
4329 }
4330 }
4331
4332 SDValue InChain = N->getOperand(0);
4333 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4334 if (!ReadIs128Bit) {
4335 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4336 {SysRegImm, InChain});
4337 } else {
4338 SDNode *MRRS = CurDAG->getMachineNode(
4339 AArch64::MRRS, DL,
4340 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4341 {SysRegImm, InChain});
4342
4343 // Sysregs are not endian. The even register always contains the low half
4344 // of the register.
4345 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4346 SDValue(MRRS, 0));
4347 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4348 SDValue(MRRS, 0));
4349 SDValue OutChain = SDValue(MRRS, 1);
4350
4351 ReplaceUses(SDValue(N, 0), Lo);
4352 ReplaceUses(SDValue(N, 1), Hi);
4353 ReplaceUses(SDValue(N, 2), OutChain);
4354 };
4355 return true;
4356}
4357
4358// Lower the write_register intrinsic to an MSR instruction node if the special
4359// register string argument is either of the form detailed in the ALCE (the
4360// form described in getIntOperandsFromRegisterString) or is a named register
4361// known by the MSR SysReg mapper.
4362bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4363 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4364 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4365 SDLoc DL(N);
4366
4367 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4368
4369 if (!WriteIs128Bit) {
4370 // Check if the register was one of those allowed as the pstatefield value
4371 // in the MSR (immediate) instruction. To accept the values allowed in the
4372 // pstatefield for the MSR (immediate) instruction, we also require that an
4373 // immediate value has been provided as an argument, we know that this is
4374 // the case as it has been ensured by semantic checking.
4375 auto trySelectPState = [&](auto PMapper, unsigned State) {
4376 if (PMapper) {
4377 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4378 "Expected a constant integer expression.");
4379 unsigned Reg = PMapper->Encoding;
4380 uint64_t Immed = N->getConstantOperandVal(2);
4381 CurDAG->SelectNodeTo(
4382 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4383 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4384 return true;
4385 }
4386 return false;
4387 };
4388
4389 if (trySelectPState(
4390 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4391 AArch64::MSRpstateImm4))
4392 return true;
4393 if (trySelectPState(
4394 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4395 AArch64::MSRpstateImm1))
4396 return true;
4397 }
4398
4399 int Imm = getIntOperandFromRegisterString(RegString->getString());
4400 if (Imm == -1) {
4401 // Use the sysreg mapper to attempt to map the remaining possible strings
4402 // to the value for the register to be used for the MSR (register)
4403 // instruction operand.
4404 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4405 if (TheReg && TheReg->Writeable &&
4406 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4407 Imm = TheReg->Encoding;
4408 else
4409 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4410
4411 if (Imm == -1) {
4412 // Used by the MSVC __setReg/__setRegFp intrinsics. Copy the value into
4413 // the physical register and keep it live with a FAKE_USE so the write is
4414 // not dead-eliminated. (getRegisterByName rejects allocatable registers,
4415 // so the generic write path cannot handle these.)
4416 Register PReg = Subtarget->getTargetLowering()->matchRegisterName(
4417 RegString->getString());
4418 bool IsGPR = AArch64::GPR64RegClass.contains(PReg);
4419 bool IsFPR = AArch64::FPR64RegClass.contains(PReg);
4420 if (!WriteIs128Bit && (IsGPR || IsFPR) &&
4421 N->getOperand(2).getValueType() == MVT::i64) {
4422 SDValue Copy =
4423 CurDAG->getCopyToReg(N->getOperand(0), DL, PReg, N->getOperand(2));
4424 SDValue RegOp = CurDAG->getRegister(PReg, MVT::i64);
4425 SDNode *FakeUse = CurDAG->getMachineNode(TargetOpcode::FAKE_USE, DL,
4426 MVT::Other, {RegOp, Copy});
4427 ReplaceUses(SDValue(N, 0), SDValue(FakeUse, 0));
4428 CurDAG->RemoveDeadNode(N);
4429 return true;
4430 }
4431 return false;
4432 }
4433 }
4434
4435 SDValue InChain = N->getOperand(0);
4436 if (!WriteIs128Bit) {
4437 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4438 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4439 N->getOperand(2), InChain);
4440 } else {
4441 // No endian swap. The lower half always goes into the even subreg, and the
4442 // higher half always into the odd supreg.
4443 SDNode *Pair = CurDAG->getMachineNode(
4444 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4445 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4446 MVT::i32),
4447 N->getOperand(2),
4448 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4449 N->getOperand(3),
4450 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4451
4452 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4453 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4454 SDValue(Pair, 0), InChain);
4455 }
4456
4457 return true;
4458}
4459
4460/// We've got special pseudo-instructions for these
4461bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4462 unsigned Opcode;
4463 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4464
4465 // Leave IR for LSE if subtarget supports it.
4466 if (Subtarget->hasLSE()) return false;
4467
4468 if (MemTy == MVT::i8)
4469 Opcode = AArch64::CMP_SWAP_8;
4470 else if (MemTy == MVT::i16)
4471 Opcode = AArch64::CMP_SWAP_16;
4472 else if (MemTy == MVT::i32)
4473 Opcode = AArch64::CMP_SWAP_32;
4474 else if (MemTy == MVT::i64)
4475 Opcode = AArch64::CMP_SWAP_64;
4476 else
4477 llvm_unreachable("Unknown AtomicCmpSwap type");
4478
4479 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4480 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4481 N->getOperand(0)};
4482 SDNode *CmpSwap = CurDAG->getMachineNode(
4483 Opcode, SDLoc(N),
4484 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4485
4486 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4487 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4488
4489 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4490 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4491 CurDAG->RemoveDeadNode(N);
4492
4493 return true;
4494}
4495
4496bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4497 SDValue &Shift, bool Negate) {
4498 if (!isa<ConstantSDNode>(N))
4499 return false;
4500
4501 APInt Val =
4502 cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4503
4504 return SelectSVEAddSubImm(SDLoc(N), Val, VT, Imm, Shift, Negate);
4505}
4506
4507bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDLoc DL, APInt Val, MVT VT,
4508 SDValue &Imm, SDValue &Shift,
4509 bool Negate) {
4510 if (Negate)
4511 Val = -Val;
4512
4513 switch (VT.SimpleTy) {
4514 case MVT::i8:
4515 // All immediates are supported.
4516 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4517 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4518 return true;
4519 case MVT::i16:
4520 case MVT::i32:
4521 case MVT::i64:
4522 // Support 8bit unsigned immediates.
4523 if ((Val & ~0xff) == 0) {
4524 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4525 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4526 return true;
4527 }
4528 // Support 16bit unsigned immediates that are a multiple of 256.
4529 if ((Val & ~0xff00) == 0) {
4530 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4531 Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
4532 return true;
4533 }
4534 break;
4535 default:
4536 break;
4537 }
4538
4539 return false;
4540}
4541
4542bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4543 SDValue &Imm, SDValue &Shift,
4544 bool Negate) {
4545 if (!isa<ConstantSDNode>(N))
4546 return false;
4547
4548 SDLoc DL(N);
4549 int64_t Val = cast<ConstantSDNode>(N)
4550 ->getAPIntValue()
4552 .getSExtValue();
4553
4554 if (Negate)
4555 Val = -Val;
4556
4557 // Signed saturating instructions treat their immediate operand as unsigned,
4558 // whereas the related intrinsics define their operands to be signed. This
4559 // means we can only use the immediate form when the operand is non-negative.
4560 if (Val < 0)
4561 return false;
4562
4563 switch (VT.SimpleTy) {
4564 case MVT::i8:
4565 // All positive immediates are supported.
4566 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4567 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4568 return true;
4569 case MVT::i16:
4570 case MVT::i32:
4571 case MVT::i64:
4572 // Support 8bit positive immediates.
4573 if (Val <= 255) {
4574 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4575 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4576 return true;
4577 }
4578 // Support 16bit positive immediates that are a multiple of 256.
4579 if (Val <= 65280 && Val % 256 == 0) {
4580 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4581 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4582 return true;
4583 }
4584 break;
4585 default:
4586 break;
4587 }
4588
4589 return false;
4590}
4591
4592bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4593 SDValue &Shift) {
4594 if (!isa<ConstantSDNode>(N))
4595 return false;
4596
4597 SDLoc DL(N);
4598 int64_t Val = cast<ConstantSDNode>(N)
4599 ->getAPIntValue()
4600 .trunc(VT.getFixedSizeInBits())
4601 .getSExtValue();
4602 int32_t ImmVal, ShiftVal;
4603 if (!AArch64_AM::isSVECpyDupImm(VT.getScalarSizeInBits(), Val, ImmVal,
4604 ShiftVal))
4605 return false;
4606
4607 Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32);
4608 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4609 return true;
4610}
4611
4612bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4613 if (auto CNode = dyn_cast<ConstantSDNode>(N))
4614 return SelectSVESignedArithImm(SDLoc(N), CNode->getAPIntValue(), Imm);
4615 return false;
4616}
4617
4618bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDLoc DL, APInt Val,
4619 SDValue &Imm) {
4620 int64_t ImmVal = Val.getSExtValue();
4621 if (ImmVal >= -128 && ImmVal < 128) {
4622 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4623 return true;
4624 }
4625 return false;
4626}
4627
4628bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4629 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4630 uint64_t ImmVal = CNode->getZExtValue();
4631
4632 switch (VT.SimpleTy) {
4633 case MVT::i8:
4634 ImmVal &= 0xFF;
4635 break;
4636 case MVT::i16:
4637 ImmVal &= 0xFFFF;
4638 break;
4639 case MVT::i32:
4640 ImmVal &= 0xFFFFFFFF;
4641 break;
4642 case MVT::i64:
4643 break;
4644 default:
4645 llvm_unreachable("Unexpected type");
4646 }
4647
4648 if (ImmVal < 256) {
4649 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4650 return true;
4651 }
4652 }
4653 return false;
4654}
4655
4656bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4657 bool Invert) {
4658 uint64_t ImmVal;
4659 if (auto CI = dyn_cast<ConstantSDNode>(N))
4660 ImmVal = CI->getZExtValue();
4661 else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
4662 ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
4663 else
4664 return false;
4665
4666 if (Invert)
4667 ImmVal = ~ImmVal;
4668
4669 uint64_t encoding;
4670 if (!AArch64_AM::isSVELogicalImm(VT.getScalarSizeInBits(), ImmVal, encoding))
4671 return false;
4672
4673 Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
4674 return true;
4675}
4676
4677// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4678// Rather than attempt to normalise everything we can sometimes saturate the
4679// shift amount during selection. This function also allows for consistent
4680// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4681// required by the instructions.
4682bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4683 uint64_t High, bool AllowSaturation,
4684 SDValue &Imm) {
4685 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4686 uint64_t ImmVal = CN->getZExtValue();
4687
4688 // Reject shift amounts that are too small.
4689 if (ImmVal < Low)
4690 return false;
4691
4692 // Reject or saturate shift amounts that are too big.
4693 if (ImmVal > High) {
4694 if (!AllowSaturation)
4695 return false;
4696 ImmVal = High;
4697 }
4698
4699 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4700 return true;
4701 }
4702
4703 return false;
4704}
4705
4706bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4707 // tagp(FrameIndex, IRGstack, tag_offset):
4708 // since the offset between FrameIndex and IRGstack is a compile-time
4709 // constant, this can be lowered to a single ADDG instruction.
4710 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4711 return false;
4712 }
4713
4714 SDValue IRG_SP = N->getOperand(2);
4715 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4716 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4717 return false;
4718 }
4719
4720 const TargetLowering *TLI = getTargetLowering();
4721 SDLoc DL(N);
4722 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4723 SDValue FiOp = CurDAG->getTargetFrameIndex(
4724 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4725 int TagOffset = N->getConstantOperandVal(3);
4726
4727 SDNode *Out = CurDAG->getMachineNode(
4728 AArch64::TAGPstack, DL, MVT::i64,
4729 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4730 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4731 ReplaceNode(N, Out);
4732 return true;
4733}
4734
4735void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4736 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4737 "llvm.aarch64.tagp third argument must be an immediate");
4738 if (trySelectStackSlotTagP(N))
4739 return;
4740 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4741 // compile-time constant, not just for stack allocations.
4742
4743 // General case for unrelated pointers in Op1 and Op2.
4744 SDLoc DL(N);
4745 int TagOffset = N->getConstantOperandVal(3);
4746 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4747 {N->getOperand(1), N->getOperand(2)});
4748 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4749 {SDValue(N1, 0), N->getOperand(2)});
4750 SDNode *N3 = CurDAG->getMachineNode(
4751 AArch64::ADDG, DL, MVT::i64,
4752 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4753 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4754 ReplaceNode(N, N3);
4755}
4756
4757bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4758 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4759
4760 // Bail when not a "cast" like insert_subvector.
4761 if (N->getConstantOperandVal(2) != 0)
4762 return false;
4763 if (!N->getOperand(0).isUndef())
4764 return false;
4765
4766 // Bail when normal isel should do the job.
4767 EVT VT = N->getValueType(0);
4768 EVT InVT = N->getOperand(1).getValueType();
4769 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4770 return false;
4771 if (InVT.getSizeInBits() <= 128)
4772 return false;
4773
4774 // NOTE: We can only get here when doing fixed length SVE code generation.
4775 // We do manual selection because the types involved are not linked to real
4776 // registers (despite being legal) and must be coerced into SVE registers.
4777
4779 "Expected to insert into a packed scalable vector!");
4780
4781 SDLoc DL(N);
4782 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4783 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4784 N->getOperand(1), RC));
4785 return true;
4786}
4787
4788bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4789 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4790
4791 // Bail when not a "cast" like extract_subvector.
4792 if (N->getConstantOperandVal(1) != 0)
4793 return false;
4794
4795 // Bail when normal isel can do the job.
4796 EVT VT = N->getValueType(0);
4797 EVT InVT = N->getOperand(0).getValueType();
4798 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4799 return false;
4800 if (VT.getSizeInBits() <= 128)
4801 return false;
4802
4803 // NOTE: We can only get here when doing fixed length SVE code generation.
4804 // We do manual selection because the types involved are not linked to real
4805 // registers (despite being legal) and must be coerced into SVE registers.
4806
4808 "Expected to extract from a packed scalable vector!");
4809
4810 SDLoc DL(N);
4811 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4812 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4813 N->getOperand(0), RC));
4814 return true;
4815}
4816
4817bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4818 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4819
4820 SDValue N0 = N->getOperand(0);
4821 SDValue N1 = N->getOperand(1);
4822
4823 EVT VT = N->getValueType(0);
4824 SDLoc DL(N);
4825
4826 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4827 // Rotate by a constant is a funnel shift in IR which is expanded to
4828 // an OR with shifted operands.
4829 // We do the following transform:
4830 // OR N0, N1 -> xar (x, y, imm)
4831 // Where:
4832 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4833 // N0 = SHL_PRED true, V, splat(bits-imm)
4834 // V = (xor x, y)
4835 if (VT.isScalableVector() &&
4836 (Subtarget->hasSVE2() ||
4837 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4838 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4839 N1.getOpcode() != AArch64ISD::SRL_PRED)
4840 std::swap(N0, N1);
4841 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4842 N1.getOpcode() != AArch64ISD::SRL_PRED)
4843 return false;
4844
4845 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4846 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4847 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4848 return false;
4849
4850 if (N0.getOperand(1) != N1.getOperand(1))
4851 return false;
4852
4853 SDValue R1, R2;
4854 bool IsXOROperand = true;
4855 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4856 IsXOROperand = false;
4857 } else {
4858 R1 = N0.getOperand(1).getOperand(0);
4859 R2 = N1.getOperand(1).getOperand(1);
4860 }
4861
4862 APInt ShlAmt, ShrAmt;
4863 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4865 return false;
4866
4867 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4868 return false;
4869
4870 if (!IsXOROperand) {
4871 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4872 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4873 SDValue MOVIV = SDValue(MOV, 0);
4874
4875 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4876 SDNode *SubRegToReg =
4877 CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, VT, MOVIV, ZSub);
4878
4879 R1 = N1->getOperand(1);
4880 R2 = SDValue(SubRegToReg, 0);
4881 }
4882
4883 SDValue Imm =
4884 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4885
4886 SDValue Ops[] = {R1, R2, Imm};
4888 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4889 AArch64::XAR_ZZZI_D})) {
4890 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4891 return true;
4892 }
4893 return false;
4894 }
4895
4896 // We have Neon SHA3 XAR operation for v2i64 but for types
4897 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4898 // is available.
4899 EVT SVT;
4900 switch (VT.getSimpleVT().SimpleTy) {
4901 case MVT::v4i32:
4902 case MVT::v2i32:
4903 SVT = MVT::nxv4i32;
4904 break;
4905 case MVT::v8i16:
4906 case MVT::v4i16:
4907 SVT = MVT::nxv8i16;
4908 break;
4909 case MVT::v16i8:
4910 case MVT::v8i8:
4911 SVT = MVT::nxv16i8;
4912 break;
4913 case MVT::v2i64:
4914 case MVT::v1i64:
4915 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4916 break;
4917 default:
4918 return false;
4919 }
4920
4921 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4922 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4923 return false;
4924
4925 if (N0->getOpcode() != AArch64ISD::VSHL ||
4926 N1->getOpcode() != AArch64ISD::VLSHR)
4927 return false;
4928
4929 if (N0->getOperand(0) != N1->getOperand(0))
4930 return false;
4931
4932 SDValue R1, R2;
4933 bool IsXOROperand = true;
4934 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4935 IsXOROperand = false;
4936 } else {
4937 SDValue XOR = N0.getOperand(0);
4938 R1 = XOR.getOperand(0);
4939 R2 = XOR.getOperand(1);
4940 }
4941
4942 unsigned HsAmt = N0.getConstantOperandVal(1);
4943 unsigned ShAmt = N1.getConstantOperandVal(1);
4944
4945 SDValue Imm = CurDAG->getTargetConstant(
4946 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4947
4948 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4949 if (ShAmt + HsAmt != VTSizeInBits)
4950 return false;
4951
4952 if (!IsXOROperand) {
4953 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4954 SDNode *MOV =
4955 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4956 SDValue MOVIV = SDValue(MOV, 0);
4957
4958 R1 = N1->getOperand(0);
4959 R2 = MOVIV;
4960 }
4961
4962 if (SVT != VT) {
4963 SDValue Undef =
4964 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4965
4966 if (SVT.isScalableVector() && VT.is64BitVector()) {
4967 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4968
4969 SDValue UndefQ = SDValue(
4970 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4971 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4972
4973 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4974 UndefQ, R1, DSub),
4975 0);
4976 if (R2.getValueType() == VT)
4977 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4978 UndefQ, R2, DSub),
4979 0);
4980 }
4981
4982 SDValue SubReg = CurDAG->getTargetConstant(
4983 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
4984
4985 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4986 R1, SubReg),
4987 0);
4988
4989 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4990 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4991 Undef, R2, SubReg),
4992 0);
4993 }
4994
4995 SDValue Ops[] = {R1, R2, Imm};
4996 SDNode *XAR = nullptr;
4997
4998 if (SVT.isScalableVector()) {
5000 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
5001 AArch64::XAR_ZZZI_D}))
5002 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
5003 } else {
5004 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
5005 }
5006
5007 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
5008
5009 if (SVT != VT) {
5010 if (VT.is64BitVector() && SVT.isScalableVector()) {
5011 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
5012
5013 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
5014 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
5015 SDValue(XAR, 0), ZSub);
5016
5017 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
5018 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
5019 SDValue(Q, 0), DSub);
5020 } else {
5021 SDValue SubReg = CurDAG->getTargetConstant(
5022 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
5023 MVT::i32);
5024 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
5025 SDValue(XAR, 0), SubReg);
5026 }
5027 }
5028 ReplaceNode(N, XAR);
5029 return true;
5030}
5031
5032void AArch64DAGToDAGISel::Select(SDNode *Node) {
5033 // If we have a custom node, we already have selected!
5034 if (Node->isMachineOpcode()) {
5035 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
5036 Node->setNodeId(-1);
5037 return;
5038 }
5039
5040 // Few custom selection stuff.
5041 EVT VT = Node->getValueType(0);
5042
5043 switch (Node->getOpcode()) {
5044 default:
5045 break;
5046
5048 if (SelectCMP_SWAP(Node))
5049 return;
5050 break;
5051
5052 case ISD::READ_REGISTER:
5053 case AArch64ISD::MRRS:
5054 if (tryReadRegister(Node))
5055 return;
5056 break;
5057
5059 case AArch64ISD::MSRR:
5060 if (tryWriteRegister(Node))
5061 return;
5062 break;
5063
5064 case ISD::LOAD: {
5065 // Try to select as an indexed load. Fall through to normal processing
5066 // if we can't.
5067 if (tryIndexedLoad(Node))
5068 return;
5069 break;
5070 }
5071
5072 case ISD::SRL:
5073 case ISD::AND:
5074 case ISD::SRA:
5076 if (tryBitfieldExtractOp(Node))
5077 return;
5078 if (tryBitfieldInsertInZeroOp(Node))
5079 return;
5080 [[fallthrough]];
5081 case ISD::ROTR:
5082 case ISD::SHL:
5083 if (tryShiftAmountMod(Node))
5084 return;
5085 break;
5086
5087 case ISD::SIGN_EXTEND:
5088 if (tryBitfieldExtractOpFromSExt(Node))
5089 return;
5090 break;
5091
5092 case ISD::OR:
5093 if (tryBitfieldInsertOp(Node))
5094 return;
5095 if (trySelectXAR(Node))
5096 return;
5097 break;
5098
5100 if (trySelectCastScalableToFixedLengthVector(Node))
5101 return;
5102 break;
5103 }
5104
5105 case ISD::INSERT_SUBVECTOR: {
5106 if (trySelectCastFixedLengthToScalableVector(Node))
5107 return;
5108 break;
5109 }
5110
5111 case ISD::Constant: {
5112 // Materialize zero constants as copies from WZR/XZR. This allows
5113 // the coalescer to propagate these into other instructions.
5114 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
5115 if (ConstNode->isZero()) {
5116 if (VT == MVT::i32) {
5117 SDValue New = CurDAG->getCopyFromReg(
5118 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
5119 ReplaceNode(Node, New.getNode());
5120 return;
5121 } else if (VT == MVT::i64) {
5122 SDValue New = CurDAG->getCopyFromReg(
5123 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
5124 ReplaceNode(Node, New.getNode());
5125 return;
5126 }
5127 }
5128 break;
5129 }
5130
5131 case ISD::FrameIndex: {
5132 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
5133 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
5134 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
5135 const TargetLowering *TLI = getTargetLowering();
5136 SDValue TFI = CurDAG->getTargetFrameIndex(
5137 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
5138 SDLoc DL(Node);
5139 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
5140 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
5141 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
5142 return;
5143 }
5145 unsigned IntNo = Node->getConstantOperandVal(1);
5146 switch (IntNo) {
5147 default:
5148 break;
5149 case Intrinsic::aarch64_gcsss: {
5150 SDLoc DL(Node);
5151 SDValue Chain = Node->getOperand(0);
5152 SDValue Val = Node->getOperand(2);
5153 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
5154 SDNode *SS1 =
5155 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
5156 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
5157 MVT::Other, Zero, SDValue(SS1, 0));
5158 ReplaceNode(Node, SS2);
5159 return;
5160 }
5161 case Intrinsic::aarch64_ldaxp:
5162 case Intrinsic::aarch64_ldxp: {
5163 unsigned Op =
5164 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
5165 SDValue MemAddr = Node->getOperand(2);
5166 SDLoc DL(Node);
5167 SDValue Chain = Node->getOperand(0);
5168
5169 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
5170 MVT::Other, MemAddr, Chain);
5171
5172 // Transfer memoperands.
5173 MachineMemOperand *MemOp =
5174 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
5175 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
5176 ReplaceNode(Node, Ld);
5177 return;
5178 }
5179 case Intrinsic::aarch64_stlxp:
5180 case Intrinsic::aarch64_stxp: {
5181 unsigned Op =
5182 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
5183 SDLoc DL(Node);
5184 SDValue Chain = Node->getOperand(0);
5185 SDValue ValLo = Node->getOperand(2);
5186 SDValue ValHi = Node->getOperand(3);
5187 SDValue MemAddr = Node->getOperand(4);
5188
5189 // Place arguments in the right order.
5190 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
5191
5192 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
5193 // Transfer memoperands.
5194 MachineMemOperand *MemOp =
5195 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
5196 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
5197
5198 ReplaceNode(Node, St);
5199 return;
5200 }
5201 case Intrinsic::aarch64_neon_ld1x2:
5202 if (VT == MVT::v8i8) {
5203 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
5204 return;
5205 } else if (VT == MVT::v16i8) {
5206 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
5207 return;
5208 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5209 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
5210 return;
5211 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5212 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
5213 return;
5214 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5215 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
5216 return;
5217 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5218 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
5219 return;
5220 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5221 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5222 return;
5223 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5224 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
5225 return;
5226 }
5227 break;
5228 case Intrinsic::aarch64_neon_ld1x3:
5229 if (VT == MVT::v8i8) {
5230 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
5231 return;
5232 } else if (VT == MVT::v16i8) {
5233 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
5234 return;
5235 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5236 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
5237 return;
5238 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5239 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
5240 return;
5241 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5242 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
5243 return;
5244 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5245 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
5246 return;
5247 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5248 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5249 return;
5250 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5251 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5252 return;
5253 }
5254 break;
5255 case Intrinsic::aarch64_neon_ld1x4:
5256 if (VT == MVT::v8i8) {
5257 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5258 return;
5259 } else if (VT == MVT::v16i8) {
5260 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5261 return;
5262 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5263 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5264 return;
5265 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5266 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5267 return;
5268 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5269 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5270 return;
5271 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5272 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5273 return;
5274 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5275 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5276 return;
5277 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5278 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5279 return;
5280 }
5281 break;
5282 case Intrinsic::aarch64_neon_ld2:
5283 if (VT == MVT::v8i8) {
5284 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5285 return;
5286 } else if (VT == MVT::v16i8) {
5287 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5288 return;
5289 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5290 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5291 return;
5292 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5293 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5294 return;
5295 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5296 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5297 return;
5298 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5299 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5300 return;
5301 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5302 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5303 return;
5304 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5305 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5306 return;
5307 }
5308 break;
5309 case Intrinsic::aarch64_neon_ld3:
5310 if (VT == MVT::v8i8) {
5311 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5312 return;
5313 } else if (VT == MVT::v16i8) {
5314 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5315 return;
5316 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5317 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5318 return;
5319 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5320 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5321 return;
5322 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5323 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5324 return;
5325 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5326 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5327 return;
5328 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5329 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5330 return;
5331 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5332 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5333 return;
5334 }
5335 break;
5336 case Intrinsic::aarch64_neon_ld4:
5337 if (VT == MVT::v8i8) {
5338 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5339 return;
5340 } else if (VT == MVT::v16i8) {
5341 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5342 return;
5343 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5344 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5345 return;
5346 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5347 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5348 return;
5349 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5350 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5351 return;
5352 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5353 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5354 return;
5355 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5356 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5357 return;
5358 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5359 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5360 return;
5361 }
5362 break;
5363 case Intrinsic::aarch64_neon_ld2r:
5364 if (VT == MVT::v8i8) {
5365 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5366 return;
5367 } else if (VT == MVT::v16i8) {
5368 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5369 return;
5370 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5371 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5372 return;
5373 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5374 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5375 return;
5376 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5377 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5378 return;
5379 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5380 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5381 return;
5382 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5383 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5384 return;
5385 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5386 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5387 return;
5388 }
5389 break;
5390 case Intrinsic::aarch64_neon_ld3r:
5391 if (VT == MVT::v8i8) {
5392 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5393 return;
5394 } else if (VT == MVT::v16i8) {
5395 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5396 return;
5397 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5398 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5399 return;
5400 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5401 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5402 return;
5403 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5404 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5405 return;
5406 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5407 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5408 return;
5409 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5410 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5411 return;
5412 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5413 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5414 return;
5415 }
5416 break;
5417 case Intrinsic::aarch64_neon_ld4r:
5418 if (VT == MVT::v8i8) {
5419 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5420 return;
5421 } else if (VT == MVT::v16i8) {
5422 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5423 return;
5424 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5425 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5426 return;
5427 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5428 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5429 return;
5430 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5431 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5432 return;
5433 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5434 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5435 return;
5436 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5437 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5438 return;
5439 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5440 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5441 return;
5442 }
5443 break;
5444 case Intrinsic::aarch64_neon_ld2lane:
5445 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5446 SelectLoadLane(Node, 2, AArch64::LD2i8);
5447 return;
5448 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5449 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5450 SelectLoadLane(Node, 2, AArch64::LD2i16);
5451 return;
5452 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5453 VT == MVT::v2f32) {
5454 SelectLoadLane(Node, 2, AArch64::LD2i32);
5455 return;
5456 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5457 VT == MVT::v1f64) {
5458 SelectLoadLane(Node, 2, AArch64::LD2i64);
5459 return;
5460 }
5461 break;
5462 case Intrinsic::aarch64_neon_ld3lane:
5463 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5464 SelectLoadLane(Node, 3, AArch64::LD3i8);
5465 return;
5466 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5467 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5468 SelectLoadLane(Node, 3, AArch64::LD3i16);
5469 return;
5470 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5471 VT == MVT::v2f32) {
5472 SelectLoadLane(Node, 3, AArch64::LD3i32);
5473 return;
5474 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5475 VT == MVT::v1f64) {
5476 SelectLoadLane(Node, 3, AArch64::LD3i64);
5477 return;
5478 }
5479 break;
5480 case Intrinsic::aarch64_neon_ld4lane:
5481 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5482 SelectLoadLane(Node, 4, AArch64::LD4i8);
5483 return;
5484 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5485 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5486 SelectLoadLane(Node, 4, AArch64::LD4i16);
5487 return;
5488 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5489 VT == MVT::v2f32) {
5490 SelectLoadLane(Node, 4, AArch64::LD4i32);
5491 return;
5492 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5493 VT == MVT::v1f64) {
5494 SelectLoadLane(Node, 4, AArch64::LD4i64);
5495 return;
5496 }
5497 break;
5498 case Intrinsic::aarch64_ld64b:
5499 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5500 return;
5501 case Intrinsic::aarch64_sve_ld2q_sret: {
5502 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5503 return;
5504 }
5505 case Intrinsic::aarch64_sve_ld3q_sret: {
5506 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5507 return;
5508 }
5509 case Intrinsic::aarch64_sve_ld4q_sret: {
5510 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5511 return;
5512 }
5513 case Intrinsic::aarch64_sve_ld2_sret: {
5514 if (VT == MVT::nxv16i8) {
5515 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5516 true);
5517 return;
5518 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5519 VT == MVT::nxv8bf16) {
5520 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5521 true);
5522 return;
5523 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5524 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5525 true);
5526 return;
5527 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5528 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5529 true);
5530 return;
5531 }
5532 break;
5533 }
5534 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5535 if (VT == MVT::nxv16i8) {
5536 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5537 SelectContiguousMultiVectorLoad(
5538 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5539 else if (Subtarget->hasSVE2p1())
5540 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5541 AArch64::LD1B_2Z);
5542 else
5543 break;
5544 return;
5545 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5546 VT == MVT::nxv8bf16) {
5547 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5548 SelectContiguousMultiVectorLoad(
5549 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5550 else if (Subtarget->hasSVE2p1())
5551 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5552 AArch64::LD1H_2Z);
5553 else
5554 break;
5555 return;
5556 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5557 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5558 SelectContiguousMultiVectorLoad(
5559 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5560 else if (Subtarget->hasSVE2p1())
5561 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5562 AArch64::LD1W_2Z);
5563 else
5564 break;
5565 return;
5566 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5567 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5568 SelectContiguousMultiVectorLoad(
5569 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5570 else if (Subtarget->hasSVE2p1())
5571 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5572 AArch64::LD1D_2Z);
5573 else
5574 break;
5575 return;
5576 }
5577 break;
5578 }
5579 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5580 if (VT == MVT::nxv16i8) {
5581 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5582 SelectContiguousMultiVectorLoad(
5583 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5584 else if (Subtarget->hasSVE2p1())
5585 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5586 AArch64::LD1B_4Z);
5587 else
5588 break;
5589 return;
5590 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5591 VT == MVT::nxv8bf16) {
5592 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5593 SelectContiguousMultiVectorLoad(
5594 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5595 else if (Subtarget->hasSVE2p1())
5596 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5597 AArch64::LD1H_4Z);
5598 else
5599 break;
5600 return;
5601 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5602 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5603 SelectContiguousMultiVectorLoad(
5604 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5605 else if (Subtarget->hasSVE2p1())
5606 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5607 AArch64::LD1W_4Z);
5608 else
5609 break;
5610 return;
5611 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5612 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5613 SelectContiguousMultiVectorLoad(
5614 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5615 else if (Subtarget->hasSVE2p1())
5616 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5617 AArch64::LD1D_4Z);
5618 else
5619 break;
5620 return;
5621 }
5622 break;
5623 }
5624 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5625 if (VT == MVT::nxv16i8) {
5626 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5627 SelectContiguousMultiVectorLoad(Node, 2, 0,
5628 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5629 AArch64::LDNT1B_2Z_PSEUDO);
5630 else if (Subtarget->hasSVE2p1())
5631 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5632 AArch64::LDNT1B_2Z);
5633 else
5634 break;
5635 return;
5636 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5637 VT == MVT::nxv8bf16) {
5638 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5639 SelectContiguousMultiVectorLoad(Node, 2, 1,
5640 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5641 AArch64::LDNT1H_2Z_PSEUDO);
5642 else if (Subtarget->hasSVE2p1())
5643 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5644 AArch64::LDNT1H_2Z);
5645 else
5646 break;
5647 return;
5648 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5649 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5650 SelectContiguousMultiVectorLoad(Node, 2, 2,
5651 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5652 AArch64::LDNT1W_2Z_PSEUDO);
5653 else if (Subtarget->hasSVE2p1())
5654 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5655 AArch64::LDNT1W_2Z);
5656 else
5657 break;
5658 return;
5659 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5660 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5661 SelectContiguousMultiVectorLoad(Node, 2, 3,
5662 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5663 AArch64::LDNT1D_2Z_PSEUDO);
5664 else if (Subtarget->hasSVE2p1())
5665 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5666 AArch64::LDNT1D_2Z);
5667 else
5668 break;
5669 return;
5670 }
5671 break;
5672 }
5673 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5674 if (VT == MVT::nxv16i8) {
5675 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5676 SelectContiguousMultiVectorLoad(Node, 4, 0,
5677 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5678 AArch64::LDNT1B_4Z_PSEUDO);
5679 else if (Subtarget->hasSVE2p1())
5680 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5681 AArch64::LDNT1B_4Z);
5682 else
5683 break;
5684 return;
5685 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5686 VT == MVT::nxv8bf16) {
5687 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5688 SelectContiguousMultiVectorLoad(Node, 4, 1,
5689 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5690 AArch64::LDNT1H_4Z_PSEUDO);
5691 else if (Subtarget->hasSVE2p1())
5692 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5693 AArch64::LDNT1H_4Z);
5694 else
5695 break;
5696 return;
5697 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5698 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5699 SelectContiguousMultiVectorLoad(Node, 4, 2,
5700 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5701 AArch64::LDNT1W_4Z_PSEUDO);
5702 else if (Subtarget->hasSVE2p1())
5703 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5704 AArch64::LDNT1W_4Z);
5705 else
5706 break;
5707 return;
5708 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5709 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5710 SelectContiguousMultiVectorLoad(Node, 4, 3,
5711 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5712 AArch64::LDNT1D_4Z_PSEUDO);
5713 else if (Subtarget->hasSVE2p1())
5714 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5715 AArch64::LDNT1D_4Z);
5716 else
5717 break;
5718 return;
5719 }
5720 break;
5721 }
5722 case Intrinsic::aarch64_sve_ld3_sret: {
5723 if (VT == MVT::nxv16i8) {
5724 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5725 true);
5726 return;
5727 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5728 VT == MVT::nxv8bf16) {
5729 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5730 true);
5731 return;
5732 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5733 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5734 true);
5735 return;
5736 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5737 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5738 true);
5739 return;
5740 }
5741 break;
5742 }
5743 case Intrinsic::aarch64_sve_ld4_sret: {
5744 if (VT == MVT::nxv16i8) {
5745 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5746 true);
5747 return;
5748 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5749 VT == MVT::nxv8bf16) {
5750 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5751 true);
5752 return;
5753 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5754 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5755 true);
5756 return;
5757 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5758 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5759 true);
5760 return;
5761 }
5762 break;
5763 }
5764 case Intrinsic::aarch64_sme_read_hor_vg2: {
5765 if (VT == MVT::nxv16i8) {
5766 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5767 AArch64::MOVA_2ZMXI_H_B);
5768 return;
5769 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5770 VT == MVT::nxv8bf16) {
5771 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5772 AArch64::MOVA_2ZMXI_H_H);
5773 return;
5774 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5775 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5776 AArch64::MOVA_2ZMXI_H_S);
5777 return;
5778 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5779 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5780 AArch64::MOVA_2ZMXI_H_D);
5781 return;
5782 }
5783 break;
5784 }
5785 case Intrinsic::aarch64_sme_read_ver_vg2: {
5786 if (VT == MVT::nxv16i8) {
5787 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5788 AArch64::MOVA_2ZMXI_V_B);
5789 return;
5790 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5791 VT == MVT::nxv8bf16) {
5792 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5793 AArch64::MOVA_2ZMXI_V_H);
5794 return;
5795 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5796 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5797 AArch64::MOVA_2ZMXI_V_S);
5798 return;
5799 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5800 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5801 AArch64::MOVA_2ZMXI_V_D);
5802 return;
5803 }
5804 break;
5805 }
5806 case Intrinsic::aarch64_sme_read_hor_vg4: {
5807 if (VT == MVT::nxv16i8) {
5808 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5809 AArch64::MOVA_4ZMXI_H_B);
5810 return;
5811 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5812 VT == MVT::nxv8bf16) {
5813 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5814 AArch64::MOVA_4ZMXI_H_H);
5815 return;
5816 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5817 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5818 AArch64::MOVA_4ZMXI_H_S);
5819 return;
5820 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5821 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5822 AArch64::MOVA_4ZMXI_H_D);
5823 return;
5824 }
5825 break;
5826 }
5827 case Intrinsic::aarch64_sme_read_ver_vg4: {
5828 if (VT == MVT::nxv16i8) {
5829 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5830 AArch64::MOVA_4ZMXI_V_B);
5831 return;
5832 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5833 VT == MVT::nxv8bf16) {
5834 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5835 AArch64::MOVA_4ZMXI_V_H);
5836 return;
5837 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5838 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5839 AArch64::MOVA_4ZMXI_V_S);
5840 return;
5841 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5842 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5843 AArch64::MOVA_4ZMXI_V_D);
5844 return;
5845 }
5846 break;
5847 }
5848 case Intrinsic::aarch64_sme_read_vg1x2: {
5849 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5850 AArch64::MOVA_VG2_2ZMXI);
5851 return;
5852 }
5853 case Intrinsic::aarch64_sme_read_vg1x4: {
5854 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5855 AArch64::MOVA_VG4_4ZMXI);
5856 return;
5857 }
5858 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5859 if (VT == MVT::nxv16i8) {
5860 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5861 return;
5862 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5863 VT == MVT::nxv8bf16) {
5864 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5865 return;
5866 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5867 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5868 return;
5869 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5870 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5871 return;
5872 }
5873 break;
5874 }
5875 case Intrinsic::aarch64_sme_readz_vert_x2: {
5876 if (VT == MVT::nxv16i8) {
5877 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5878 return;
5879 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5880 VT == MVT::nxv8bf16) {
5881 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5882 return;
5883 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5884 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5885 return;
5886 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5887 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5888 return;
5889 }
5890 break;
5891 }
5892 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5893 if (VT == MVT::nxv16i8) {
5894 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5895 return;
5896 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5897 VT == MVT::nxv8bf16) {
5898 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5899 return;
5900 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5901 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5902 return;
5903 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5904 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5905 return;
5906 }
5907 break;
5908 }
5909 case Intrinsic::aarch64_sme_readz_vert_x4: {
5910 if (VT == MVT::nxv16i8) {
5911 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5912 return;
5913 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5914 VT == MVT::nxv8bf16) {
5915 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5916 return;
5917 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5918 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5919 return;
5920 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5921 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5922 return;
5923 }
5924 break;
5925 }
5926 case Intrinsic::aarch64_sme_readz_x2: {
5927 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5928 AArch64::ZA);
5929 return;
5930 }
5931 case Intrinsic::aarch64_sme_readz_x4: {
5932 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5933 AArch64::ZA);
5934 return;
5935 }
5936 case Intrinsic::swift_async_context_addr: {
5937 SDLoc DL(Node);
5938 SDValue Chain = Node->getOperand(0);
5939 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5940 SDValue Res = SDValue(
5941 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5942 CurDAG->getTargetConstant(8, DL, MVT::i32),
5943 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5944 0);
5945 ReplaceUses(SDValue(Node, 0), Res);
5946 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5947 CurDAG->RemoveDeadNode(Node);
5948
5949 auto &MF = CurDAG->getMachineFunction();
5950 MF.getFrameInfo().setFrameAddressIsTaken(true);
5951 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5952 return;
5953 }
5954 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5956 Node->getValueType(0),
5957 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5958 AArch64::LUTI2_4ZTZI_S}))
5959 // Second Immediate must be <= 3:
5960 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5961 return;
5962 }
5963 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5965 Node->getValueType(0),
5966 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5967 // Second Immediate must be <= 1:
5968 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5969 return;
5970 }
5971 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5973 Node->getValueType(0),
5974 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5975 AArch64::LUTI2_2ZTZI_S}))
5976 // Second Immediate must be <= 7:
5977 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5978 return;
5979 }
5980 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5982 Node->getValueType(0),
5983 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5984 AArch64::LUTI4_2ZTZI_S}))
5985 // Second Immediate must be <= 3:
5986 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5987 return;
5988 }
5989 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5990 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5991 return;
5992 }
5993 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5995 Node->getValueType(0),
5996 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5997 SelectCVTIntrinsicFP8(Node, 2, Opc);
5998 return;
5999 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
6001 Node->getValueType(0),
6002 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
6003 SelectCVTIntrinsicFP8(Node, 2, Opc);
6004 return;
6005 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
6007 Node->getValueType(0),
6008 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
6009 SelectCVTIntrinsicFP8(Node, 2, Opc);
6010 return;
6011 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
6013 Node->getValueType(0),
6014 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
6015 SelectCVTIntrinsicFP8(Node, 2, Opc);
6016 return;
6017 case Intrinsic::ptrauth_resign_load_relative:
6018 SelectPtrauthResign(Node);
6019 return;
6020 }
6021 } break;
6023 unsigned IntNo = Node->getConstantOperandVal(0);
6024 switch (IntNo) {
6025 default:
6026 break;
6027 case Intrinsic::aarch64_tagp:
6028 SelectTagP(Node);
6029 return;
6030
6031 case Intrinsic::ptrauth_auth:
6032 SelectPtrauthAuth(Node);
6033 return;
6034
6035 case Intrinsic::ptrauth_resign:
6036 SelectPtrauthResign(Node);
6037 return;
6038
6039 case Intrinsic::aarch64_neon_tbl2:
6040 SelectTable(Node, 2,
6041 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
6042 false);
6043 return;
6044 case Intrinsic::aarch64_neon_tbl3:
6045 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
6046 : AArch64::TBLv16i8Three,
6047 false);
6048 return;
6049 case Intrinsic::aarch64_neon_tbl4:
6050 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
6051 : AArch64::TBLv16i8Four,
6052 false);
6053 return;
6054 case Intrinsic::aarch64_neon_tbx2:
6055 SelectTable(Node, 2,
6056 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
6057 true);
6058 return;
6059 case Intrinsic::aarch64_neon_tbx3:
6060 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
6061 : AArch64::TBXv16i8Three,
6062 true);
6063 return;
6064 case Intrinsic::aarch64_neon_tbx4:
6065 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
6066 : AArch64::TBXv16i8Four,
6067 true);
6068 return;
6069 case Intrinsic::aarch64_sve_srshl_single_x2:
6071 Node->getValueType(0),
6072 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
6073 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
6074 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6075 return;
6076 case Intrinsic::aarch64_sve_srshl_single_x4:
6078 Node->getValueType(0),
6079 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
6080 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
6081 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6082 return;
6083 case Intrinsic::aarch64_sve_urshl_single_x2:
6085 Node->getValueType(0),
6086 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
6087 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
6088 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6089 return;
6090 case Intrinsic::aarch64_sve_urshl_single_x4:
6092 Node->getValueType(0),
6093 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
6094 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
6095 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6096 return;
6097 case Intrinsic::aarch64_sve_srshl_x2:
6099 Node->getValueType(0),
6100 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
6101 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
6102 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6103 return;
6104 case Intrinsic::aarch64_sve_srshl_x4:
6106 Node->getValueType(0),
6107 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
6108 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
6109 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6110 return;
6111 case Intrinsic::aarch64_sve_urshl_x2:
6113 Node->getValueType(0),
6114 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
6115 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
6116 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6117 return;
6118 case Intrinsic::aarch64_sve_urshl_x4:
6120 Node->getValueType(0),
6121 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
6122 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
6123 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6124 return;
6125 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
6127 Node->getValueType(0),
6128 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
6129 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
6130 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6131 return;
6132 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
6134 Node->getValueType(0),
6135 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
6136 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
6137 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6138 return;
6139 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
6141 Node->getValueType(0),
6142 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
6143 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
6144 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6145 return;
6146 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
6148 Node->getValueType(0),
6149 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
6150 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
6151 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6152 return;
6153 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
6155 Node->getValueType(0),
6156 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
6157 AArch64::FSCALE_2ZZ_D}))
6158 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6159 return;
6160 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
6162 Node->getValueType(0),
6163 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
6164 AArch64::FSCALE_4ZZ_D}))
6165 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6166 return;
6167 case Intrinsic::aarch64_sme_fp8_scale_x2:
6169 Node->getValueType(0),
6170 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
6171 AArch64::FSCALE_2Z2Z_D}))
6172 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6173 return;
6174 case Intrinsic::aarch64_sme_fp8_scale_x4:
6176 Node->getValueType(0),
6177 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
6178 AArch64::FSCALE_4Z4Z_D}))
6179 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6180 return;
6181 case Intrinsic::aarch64_sve_whilege_x2:
6183 Node->getValueType(0),
6184 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
6185 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
6186 SelectWhilePair(Node, Op);
6187 return;
6188 case Intrinsic::aarch64_sve_whilegt_x2:
6190 Node->getValueType(0),
6191 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
6192 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
6193 SelectWhilePair(Node, Op);
6194 return;
6195 case Intrinsic::aarch64_sve_whilehi_x2:
6197 Node->getValueType(0),
6198 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
6199 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
6200 SelectWhilePair(Node, Op);
6201 return;
6202 case Intrinsic::aarch64_sve_whilehs_x2:
6204 Node->getValueType(0),
6205 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
6206 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
6207 SelectWhilePair(Node, Op);
6208 return;
6209 case Intrinsic::aarch64_sve_whilele_x2:
6211 Node->getValueType(0),
6212 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
6213 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
6214 SelectWhilePair(Node, Op);
6215 return;
6216 case Intrinsic::aarch64_sve_whilelo_x2:
6218 Node->getValueType(0),
6219 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
6220 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
6221 SelectWhilePair(Node, Op);
6222 return;
6223 case Intrinsic::aarch64_sve_whilels_x2:
6225 Node->getValueType(0),
6226 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
6227 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
6228 SelectWhilePair(Node, Op);
6229 return;
6230 case Intrinsic::aarch64_sve_whilelt_x2:
6232 Node->getValueType(0),
6233 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
6234 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
6235 SelectWhilePair(Node, Op);
6236 return;
6237 case Intrinsic::aarch64_sve_smax_single_x2:
6239 Node->getValueType(0),
6240 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6241 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6242 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6243 return;
6244 case Intrinsic::aarch64_sve_umax_single_x2:
6246 Node->getValueType(0),
6247 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6248 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6249 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6250 return;
6251 case Intrinsic::aarch64_sve_fmax_single_x2:
6253 Node->getValueType(0),
6254 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6255 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6256 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6257 return;
6258 case Intrinsic::aarch64_sve_smax_single_x4:
6260 Node->getValueType(0),
6261 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6262 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6263 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6264 return;
6265 case Intrinsic::aarch64_sve_umax_single_x4:
6267 Node->getValueType(0),
6268 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6269 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6270 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6271 return;
6272 case Intrinsic::aarch64_sve_fmax_single_x4:
6274 Node->getValueType(0),
6275 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6276 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6277 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6278 return;
6279 case Intrinsic::aarch64_sve_smin_single_x2:
6281 Node->getValueType(0),
6282 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6283 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6284 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6285 return;
6286 case Intrinsic::aarch64_sve_umin_single_x2:
6288 Node->getValueType(0),
6289 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6290 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6291 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6292 return;
6293 case Intrinsic::aarch64_sve_fmin_single_x2:
6295 Node->getValueType(0),
6296 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6297 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6298 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6299 return;
6300 case Intrinsic::aarch64_sve_smin_single_x4:
6302 Node->getValueType(0),
6303 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6304 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6305 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6306 return;
6307 case Intrinsic::aarch64_sve_umin_single_x4:
6309 Node->getValueType(0),
6310 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6311 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6312 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6313 return;
6314 case Intrinsic::aarch64_sve_fmin_single_x4:
6316 Node->getValueType(0),
6317 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6318 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6319 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6320 return;
6321 case Intrinsic::aarch64_sve_smax_x2:
6323 Node->getValueType(0),
6324 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6325 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6326 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6327 return;
6328 case Intrinsic::aarch64_sve_umax_x2:
6330 Node->getValueType(0),
6331 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6332 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6333 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6334 return;
6335 case Intrinsic::aarch64_sve_fmax_x2:
6337 Node->getValueType(0),
6338 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6339 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6340 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6341 return;
6342 case Intrinsic::aarch64_sve_smax_x4:
6344 Node->getValueType(0),
6345 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6346 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6347 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6348 return;
6349 case Intrinsic::aarch64_sve_umax_x4:
6351 Node->getValueType(0),
6352 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6353 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6354 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6355 return;
6356 case Intrinsic::aarch64_sve_fmax_x4:
6358 Node->getValueType(0),
6359 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6360 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6361 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6362 return;
6363 case Intrinsic::aarch64_sme_famax_x2:
6365 Node->getValueType(0),
6366 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6367 AArch64::FAMAX_2Z2Z_D}))
6368 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6369 return;
6370 case Intrinsic::aarch64_sme_famax_x4:
6372 Node->getValueType(0),
6373 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6374 AArch64::FAMAX_4Z4Z_D}))
6375 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6376 return;
6377 case Intrinsic::aarch64_sme_famin_x2:
6379 Node->getValueType(0),
6380 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6381 AArch64::FAMIN_2Z2Z_D}))
6382 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6383 return;
6384 case Intrinsic::aarch64_sme_famin_x4:
6386 Node->getValueType(0),
6387 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6388 AArch64::FAMIN_4Z4Z_D}))
6389 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6390 return;
6391 case Intrinsic::aarch64_sve_smin_x2:
6393 Node->getValueType(0),
6394 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6395 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6396 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6397 return;
6398 case Intrinsic::aarch64_sve_umin_x2:
6400 Node->getValueType(0),
6401 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6402 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6403 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6404 return;
6405 case Intrinsic::aarch64_sve_fmin_x2:
6407 Node->getValueType(0),
6408 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6409 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6410 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6411 return;
6412 case Intrinsic::aarch64_sve_smin_x4:
6414 Node->getValueType(0),
6415 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6416 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6417 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6418 return;
6419 case Intrinsic::aarch64_sve_umin_x4:
6421 Node->getValueType(0),
6422 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6423 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6424 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6425 return;
6426 case Intrinsic::aarch64_sve_fmin_x4:
6428 Node->getValueType(0),
6429 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6430 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6431 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6432 return;
6433 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6435 Node->getValueType(0),
6436 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6437 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6438 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6439 return;
6440 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6442 Node->getValueType(0),
6443 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6444 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6445 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6446 return;
6447 case Intrinsic::aarch64_sve_fminnm_single_x2:
6449 Node->getValueType(0),
6450 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6451 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6452 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6453 return;
6454 case Intrinsic::aarch64_sve_fminnm_single_x4:
6456 Node->getValueType(0),
6457 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6458 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6459 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6460 return;
6461 case Intrinsic::aarch64_sve_fscale_single_x4:
6462 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::BFSCALE_4ZZ);
6463 return;
6464 case Intrinsic::aarch64_sve_fscale_single_x2:
6465 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::BFSCALE_2ZZ);
6466 return;
6467 case Intrinsic::aarch64_sve_fmul_single_x4:
6469 Node->getValueType(0),
6470 {AArch64::BFMUL_4ZZ, AArch64::FMUL_4ZZ_H, AArch64::FMUL_4ZZ_S,
6471 AArch64::FMUL_4ZZ_D}))
6472 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6473 return;
6474 case Intrinsic::aarch64_sve_fmul_single_x2:
6476 Node->getValueType(0),
6477 {AArch64::BFMUL_2ZZ, AArch64::FMUL_2ZZ_H, AArch64::FMUL_2ZZ_S,
6478 AArch64::FMUL_2ZZ_D}))
6479 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6480 return;
6481 case Intrinsic::aarch64_sve_fmaxnm_x2:
6483 Node->getValueType(0),
6484 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6485 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6486 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6487 return;
6488 case Intrinsic::aarch64_sve_fmaxnm_x4:
6490 Node->getValueType(0),
6491 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6492 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6493 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6494 return;
6495 case Intrinsic::aarch64_sve_fminnm_x2:
6497 Node->getValueType(0),
6498 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6499 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6500 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6501 return;
6502 case Intrinsic::aarch64_sve_fminnm_x4:
6504 Node->getValueType(0),
6505 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6506 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6507 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6508 return;
6509 case Intrinsic::aarch64_sve_aese_lane_x2:
6510 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESE_2ZZI_B);
6511 return;
6512 case Intrinsic::aarch64_sve_aesd_lane_x2:
6513 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESD_2ZZI_B);
6514 return;
6515 case Intrinsic::aarch64_sve_aesemc_lane_x2:
6516 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESEMC_2ZZI_B);
6517 return;
6518 case Intrinsic::aarch64_sve_aesdimc_lane_x2:
6519 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESDIMC_2ZZI_B);
6520 return;
6521 case Intrinsic::aarch64_sve_aese_lane_x4:
6522 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESE_4ZZI_B);
6523 return;
6524 case Intrinsic::aarch64_sve_aesd_lane_x4:
6525 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESD_4ZZI_B);
6526 return;
6527 case Intrinsic::aarch64_sve_aesemc_lane_x4:
6528 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESEMC_4ZZI_B);
6529 return;
6530 case Intrinsic::aarch64_sve_aesdimc_lane_x4:
6531 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESDIMC_4ZZI_B);
6532 return;
6533 case Intrinsic::aarch64_sve_pmlal_pair_x2:
6534 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::PMLAL_2ZZZ_Q);
6535 return;
6536 case Intrinsic::aarch64_sve_pmull_pair_x2: {
6537 SDLoc DL(Node);
6538 SmallVector<SDValue, 4> Regs(Node->ops().slice(1, 2));
6539 SDNode *Res =
6540 CurDAG->getMachineNode(AArch64::PMULL_2ZZZ_Q, DL, MVT::Untyped, Regs);
6541 SDValue SuperReg = SDValue(Res, 0);
6542 for (unsigned I = 0; I < 2; I++)
6543 ReplaceUses(SDValue(Node, I),
6544 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
6545 SuperReg));
6546 CurDAG->RemoveDeadNode(Node);
6547 return;
6548 }
6549 case Intrinsic::aarch64_sve_fscale_x4:
6550 SelectDestructiveMultiIntrinsic(Node, 4, true, AArch64::BFSCALE_4Z4Z);
6551 return;
6552 case Intrinsic::aarch64_sve_fscale_x2:
6553 SelectDestructiveMultiIntrinsic(Node, 2, true, AArch64::BFSCALE_2Z2Z);
6554 return;
6555 case Intrinsic::aarch64_sve_fmul_x4:
6557 Node->getValueType(0),
6558 {AArch64::BFMUL_4Z4Z, AArch64::FMUL_4Z4Z_H, AArch64::FMUL_4Z4Z_S,
6559 AArch64::FMUL_4Z4Z_D}))
6560 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6561 return;
6562 case Intrinsic::aarch64_sve_fmul_x2:
6564 Node->getValueType(0),
6565 {AArch64::BFMUL_2Z2Z, AArch64::FMUL_2Z2Z_H, AArch64::FMUL_2Z2Z_S,
6566 AArch64::FMUL_2Z2Z_D}))
6567 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6568 return;
6569 case Intrinsic::aarch64_sve_fcvtzs_x2:
6570 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6571 return;
6572 case Intrinsic::aarch64_sve_scvtf_x2:
6573 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6574 return;
6575 case Intrinsic::aarch64_sve_fcvtzu_x2:
6576 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6577 return;
6578 case Intrinsic::aarch64_sve_ucvtf_x2:
6579 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6580 return;
6581 case Intrinsic::aarch64_sve_fcvtzs_x4:
6582 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6583 return;
6584 case Intrinsic::aarch64_sve_scvtf_x4:
6585 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6586 return;
6587 case Intrinsic::aarch64_sve_fcvtzu_x4:
6588 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6589 return;
6590 case Intrinsic::aarch64_sve_ucvtf_x4:
6591 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6592 return;
6593 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6594 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6595 return;
6596 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6597 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6598 return;
6599 case Intrinsic::aarch64_sve_sclamp_single_x2:
6601 Node->getValueType(0),
6602 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6603 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6604 SelectClamp(Node, 2, Op);
6605 return;
6606 case Intrinsic::aarch64_sve_uclamp_single_x2:
6608 Node->getValueType(0),
6609 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6610 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6611 SelectClamp(Node, 2, Op);
6612 return;
6613 case Intrinsic::aarch64_sve_fclamp_single_x2:
6615 Node->getValueType(0),
6616 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6617 AArch64::FCLAMP_VG2_2Z2Z_D}))
6618 SelectClamp(Node, 2, Op);
6619 return;
6620 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6621 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6622 return;
6623 case Intrinsic::aarch64_sve_sclamp_single_x4:
6625 Node->getValueType(0),
6626 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6627 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6628 SelectClamp(Node, 4, Op);
6629 return;
6630 case Intrinsic::aarch64_sve_uclamp_single_x4:
6632 Node->getValueType(0),
6633 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6634 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6635 SelectClamp(Node, 4, Op);
6636 return;
6637 case Intrinsic::aarch64_sve_fclamp_single_x4:
6639 Node->getValueType(0),
6640 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6641 AArch64::FCLAMP_VG4_4Z4Z_D}))
6642 SelectClamp(Node, 4, Op);
6643 return;
6644 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6645 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6646 return;
6647 case Intrinsic::aarch64_sve_add_single_x2:
6649 Node->getValueType(0),
6650 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6651 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6652 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6653 return;
6654 case Intrinsic::aarch64_sve_add_single_x4:
6656 Node->getValueType(0),
6657 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6658 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6659 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6660 return;
6661 case Intrinsic::aarch64_sve_zip_x2:
6663 Node->getValueType(0),
6664 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6665 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6666 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6667 return;
6668 case Intrinsic::aarch64_sve_zipq_x2:
6669 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6670 AArch64::ZIP_VG2_2ZZZ_Q);
6671 return;
6672 case Intrinsic::aarch64_sve_zip_x4:
6674 Node->getValueType(0),
6675 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6676 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6677 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6678 return;
6679 case Intrinsic::aarch64_sve_zipq_x4:
6680 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6681 AArch64::ZIP_VG4_4Z4Z_Q);
6682 return;
6683 case Intrinsic::aarch64_sve_uzp_x2:
6685 Node->getValueType(0),
6686 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6687 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6688 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6689 return;
6690 case Intrinsic::aarch64_sve_uzpq_x2:
6691 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6692 AArch64::UZP_VG2_2ZZZ_Q);
6693 return;
6694 case Intrinsic::aarch64_sve_uzp_x4:
6696 Node->getValueType(0),
6697 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6698 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6699 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6700 return;
6701 case Intrinsic::aarch64_sve_uzpq_x4:
6702 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6703 AArch64::UZP_VG4_4Z4Z_Q);
6704 return;
6705 case Intrinsic::aarch64_sve_sel_x2:
6707 Node->getValueType(0),
6708 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6709 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6710 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6711 return;
6712 case Intrinsic::aarch64_sve_sel_x4:
6714 Node->getValueType(0),
6715 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6716 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6717 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6718 return;
6719 case Intrinsic::aarch64_sve_frinta_x2:
6720 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6721 return;
6722 case Intrinsic::aarch64_sve_frinta_x4:
6723 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6724 return;
6725 case Intrinsic::aarch64_sve_frintm_x2:
6726 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6727 return;
6728 case Intrinsic::aarch64_sve_frintm_x4:
6729 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6730 return;
6731 case Intrinsic::aarch64_sve_frintn_x2:
6732 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6733 return;
6734 case Intrinsic::aarch64_sve_frintn_x4:
6735 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6736 return;
6737 case Intrinsic::aarch64_sve_frintp_x2:
6738 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6739 return;
6740 case Intrinsic::aarch64_sve_frintp_x4:
6741 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6742 return;
6743 case Intrinsic::aarch64_sve_sunpk_x2:
6745 Node->getValueType(0),
6746 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6747 AArch64::SUNPK_VG2_2ZZ_D}))
6748 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6749 return;
6750 case Intrinsic::aarch64_sve_uunpk_x2:
6752 Node->getValueType(0),
6753 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6754 AArch64::UUNPK_VG2_2ZZ_D}))
6755 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6756 return;
6757 case Intrinsic::aarch64_sve_sunpk_x4:
6759 Node->getValueType(0),
6760 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6761 AArch64::SUNPK_VG4_4Z2Z_D}))
6762 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6763 return;
6764 case Intrinsic::aarch64_sve_uunpk_x4:
6766 Node->getValueType(0),
6767 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6768 AArch64::UUNPK_VG4_4Z2Z_D}))
6769 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6770 return;
6771 case Intrinsic::aarch64_sve_pext_x2: {
6773 Node->getValueType(0),
6774 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6775 AArch64::PEXT_2PCI_D}))
6776 SelectPExtPair(Node, Op);
6777 return;
6778 }
6779 }
6780 break;
6781 }
6782 case ISD::INTRINSIC_VOID: {
6783 unsigned IntNo = Node->getConstantOperandVal(1);
6784 if (Node->getNumOperands() >= 3)
6785 VT = Node->getOperand(2)->getValueType(0);
6786 switch (IntNo) {
6787 default:
6788 break;
6789 case Intrinsic::aarch64_neon_st1x2: {
6790 if (VT == MVT::v8i8) {
6791 SelectStore(Node, 2, AArch64::ST1Twov8b);
6792 return;
6793 } else if (VT == MVT::v16i8) {
6794 SelectStore(Node, 2, AArch64::ST1Twov16b);
6795 return;
6796 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6797 VT == MVT::v4bf16) {
6798 SelectStore(Node, 2, AArch64::ST1Twov4h);
6799 return;
6800 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6801 VT == MVT::v8bf16) {
6802 SelectStore(Node, 2, AArch64::ST1Twov8h);
6803 return;
6804 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6805 SelectStore(Node, 2, AArch64::ST1Twov2s);
6806 return;
6807 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6808 SelectStore(Node, 2, AArch64::ST1Twov4s);
6809 return;
6810 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6811 SelectStore(Node, 2, AArch64::ST1Twov2d);
6812 return;
6813 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6814 SelectStore(Node, 2, AArch64::ST1Twov1d);
6815 return;
6816 }
6817 break;
6818 }
6819 case Intrinsic::aarch64_neon_st1x3: {
6820 if (VT == MVT::v8i8) {
6821 SelectStore(Node, 3, AArch64::ST1Threev8b);
6822 return;
6823 } else if (VT == MVT::v16i8) {
6824 SelectStore(Node, 3, AArch64::ST1Threev16b);
6825 return;
6826 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6827 VT == MVT::v4bf16) {
6828 SelectStore(Node, 3, AArch64::ST1Threev4h);
6829 return;
6830 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6831 VT == MVT::v8bf16) {
6832 SelectStore(Node, 3, AArch64::ST1Threev8h);
6833 return;
6834 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6835 SelectStore(Node, 3, AArch64::ST1Threev2s);
6836 return;
6837 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6838 SelectStore(Node, 3, AArch64::ST1Threev4s);
6839 return;
6840 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6841 SelectStore(Node, 3, AArch64::ST1Threev2d);
6842 return;
6843 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6844 SelectStore(Node, 3, AArch64::ST1Threev1d);
6845 return;
6846 }
6847 break;
6848 }
6849 case Intrinsic::aarch64_neon_st1x4: {
6850 if (VT == MVT::v8i8) {
6851 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6852 return;
6853 } else if (VT == MVT::v16i8) {
6854 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6855 return;
6856 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6857 VT == MVT::v4bf16) {
6858 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6859 return;
6860 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6861 VT == MVT::v8bf16) {
6862 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6863 return;
6864 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6865 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6866 return;
6867 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6868 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6869 return;
6870 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6871 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6872 return;
6873 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6874 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6875 return;
6876 }
6877 break;
6878 }
6879 case Intrinsic::aarch64_neon_st2: {
6880 if (VT == MVT::v8i8) {
6881 SelectStore(Node, 2, AArch64::ST2Twov8b);
6882 return;
6883 } else if (VT == MVT::v16i8) {
6884 SelectStore(Node, 2, AArch64::ST2Twov16b);
6885 return;
6886 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6887 VT == MVT::v4bf16) {
6888 SelectStore(Node, 2, AArch64::ST2Twov4h);
6889 return;
6890 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6891 VT == MVT::v8bf16) {
6892 SelectStore(Node, 2, AArch64::ST2Twov8h);
6893 return;
6894 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6895 SelectStore(Node, 2, AArch64::ST2Twov2s);
6896 return;
6897 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6898 SelectStore(Node, 2, AArch64::ST2Twov4s);
6899 return;
6900 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6901 SelectStore(Node, 2, AArch64::ST2Twov2d);
6902 return;
6903 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6904 SelectStore(Node, 2, AArch64::ST1Twov1d);
6905 return;
6906 }
6907 break;
6908 }
6909 case Intrinsic::aarch64_neon_st3: {
6910 if (VT == MVT::v8i8) {
6911 SelectStore(Node, 3, AArch64::ST3Threev8b);
6912 return;
6913 } else if (VT == MVT::v16i8) {
6914 SelectStore(Node, 3, AArch64::ST3Threev16b);
6915 return;
6916 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6917 VT == MVT::v4bf16) {
6918 SelectStore(Node, 3, AArch64::ST3Threev4h);
6919 return;
6920 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6921 VT == MVT::v8bf16) {
6922 SelectStore(Node, 3, AArch64::ST3Threev8h);
6923 return;
6924 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6925 SelectStore(Node, 3, AArch64::ST3Threev2s);
6926 return;
6927 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6928 SelectStore(Node, 3, AArch64::ST3Threev4s);
6929 return;
6930 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6931 SelectStore(Node, 3, AArch64::ST3Threev2d);
6932 return;
6933 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6934 SelectStore(Node, 3, AArch64::ST1Threev1d);
6935 return;
6936 }
6937 break;
6938 }
6939 case Intrinsic::aarch64_neon_st4: {
6940 if (VT == MVT::v8i8) {
6941 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6942 return;
6943 } else if (VT == MVT::v16i8) {
6944 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6945 return;
6946 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6947 VT == MVT::v4bf16) {
6948 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6949 return;
6950 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6951 VT == MVT::v8bf16) {
6952 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6953 return;
6954 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6955 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6956 return;
6957 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6958 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6959 return;
6960 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6961 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6962 return;
6963 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6964 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6965 return;
6966 }
6967 break;
6968 }
6969 case Intrinsic::aarch64_neon_st2lane: {
6970 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6971 SelectStoreLane(Node, 2, AArch64::ST2i8);
6972 return;
6973 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6974 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6975 SelectStoreLane(Node, 2, AArch64::ST2i16);
6976 return;
6977 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6978 VT == MVT::v2f32) {
6979 SelectStoreLane(Node, 2, AArch64::ST2i32);
6980 return;
6981 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6982 VT == MVT::v1f64) {
6983 SelectStoreLane(Node, 2, AArch64::ST2i64);
6984 return;
6985 }
6986 break;
6987 }
6988 case Intrinsic::aarch64_neon_st3lane: {
6989 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6990 SelectStoreLane(Node, 3, AArch64::ST3i8);
6991 return;
6992 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6993 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6994 SelectStoreLane(Node, 3, AArch64::ST3i16);
6995 return;
6996 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6997 VT == MVT::v2f32) {
6998 SelectStoreLane(Node, 3, AArch64::ST3i32);
6999 return;
7000 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7001 VT == MVT::v1f64) {
7002 SelectStoreLane(Node, 3, AArch64::ST3i64);
7003 return;
7004 }
7005 break;
7006 }
7007 case Intrinsic::aarch64_neon_st4lane: {
7008 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7009 SelectStoreLane(Node, 4, AArch64::ST4i8);
7010 return;
7011 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7012 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7013 SelectStoreLane(Node, 4, AArch64::ST4i16);
7014 return;
7015 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7016 VT == MVT::v2f32) {
7017 SelectStoreLane(Node, 4, AArch64::ST4i32);
7018 return;
7019 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7020 VT == MVT::v1f64) {
7021 SelectStoreLane(Node, 4, AArch64::ST4i64);
7022 return;
7023 }
7024 break;
7025 }
7026 case Intrinsic::aarch64_sve_st2q: {
7027 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
7028 return;
7029 }
7030 case Intrinsic::aarch64_sve_st3q: {
7031 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
7032 return;
7033 }
7034 case Intrinsic::aarch64_sve_st4q: {
7035 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
7036 return;
7037 }
7038 case Intrinsic::aarch64_sve_st2: {
7039 if (VT == MVT::nxv16i8) {
7040 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
7041 return;
7042 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7043 VT == MVT::nxv8bf16) {
7044 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
7045 return;
7046 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7047 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
7048 return;
7049 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7050 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
7051 return;
7052 }
7053 break;
7054 }
7055 case Intrinsic::aarch64_sve_st3: {
7056 if (VT == MVT::nxv16i8) {
7057 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
7058 return;
7059 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7060 VT == MVT::nxv8bf16) {
7061 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
7062 return;
7063 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7064 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
7065 return;
7066 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7067 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
7068 return;
7069 }
7070 break;
7071 }
7072 case Intrinsic::aarch64_sve_st4: {
7073 if (VT == MVT::nxv16i8) {
7074 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
7075 return;
7076 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7077 VT == MVT::nxv8bf16) {
7078 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
7079 return;
7080 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7081 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
7082 return;
7083 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7084 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
7085 return;
7086 }
7087 break;
7088 }
7089 }
7090 break;
7091 }
7092 case AArch64ISD::LD2post: {
7093 if (VT == MVT::v8i8) {
7094 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
7095 return;
7096 } else if (VT == MVT::v16i8) {
7097 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
7098 return;
7099 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7100 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
7101 return;
7102 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7103 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
7104 return;
7105 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7106 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
7107 return;
7108 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7109 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
7110 return;
7111 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7112 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
7113 return;
7114 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7115 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
7116 return;
7117 }
7118 break;
7119 }
7120 case AArch64ISD::LD3post: {
7121 if (VT == MVT::v8i8) {
7122 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
7123 return;
7124 } else if (VT == MVT::v16i8) {
7125 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
7126 return;
7127 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7128 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
7129 return;
7130 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7131 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
7132 return;
7133 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7134 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
7135 return;
7136 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7137 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
7138 return;
7139 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7140 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
7141 return;
7142 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7143 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
7144 return;
7145 }
7146 break;
7147 }
7148 case AArch64ISD::LD4post: {
7149 if (VT == MVT::v8i8) {
7150 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
7151 return;
7152 } else if (VT == MVT::v16i8) {
7153 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
7154 return;
7155 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7156 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
7157 return;
7158 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7159 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
7160 return;
7161 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7162 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
7163 return;
7164 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7165 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
7166 return;
7167 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7168 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
7169 return;
7170 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7171 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
7172 return;
7173 }
7174 break;
7175 }
7176 case AArch64ISD::LD1x2post: {
7177 if (VT == MVT::v8i8) {
7178 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
7179 return;
7180 } else if (VT == MVT::v16i8) {
7181 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
7182 return;
7183 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7184 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
7185 return;
7186 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7187 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
7188 return;
7189 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7190 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
7191 return;
7192 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7193 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
7194 return;
7195 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7196 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
7197 return;
7198 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7199 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
7200 return;
7201 }
7202 break;
7203 }
7204 case AArch64ISD::LD1x3post: {
7205 if (VT == MVT::v8i8) {
7206 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
7207 return;
7208 } else if (VT == MVT::v16i8) {
7209 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
7210 return;
7211 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7212 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
7213 return;
7214 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7215 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
7216 return;
7217 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7218 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
7219 return;
7220 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7221 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
7222 return;
7223 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7224 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
7225 return;
7226 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7227 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
7228 return;
7229 }
7230 break;
7231 }
7232 case AArch64ISD::LD1x4post: {
7233 if (VT == MVT::v8i8) {
7234 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
7235 return;
7236 } else if (VT == MVT::v16i8) {
7237 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
7238 return;
7239 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7240 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
7241 return;
7242 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7243 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
7244 return;
7245 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7246 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
7247 return;
7248 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7249 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
7250 return;
7251 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7252 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
7253 return;
7254 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7255 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
7256 return;
7257 }
7258 break;
7259 }
7260 case AArch64ISD::LD1DUPpost: {
7261 if (VT == MVT::v8i8) {
7262 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
7263 return;
7264 } else if (VT == MVT::v16i8) {
7265 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
7266 return;
7267 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7268 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
7269 return;
7270 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7271 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
7272 return;
7273 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7274 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
7275 return;
7276 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7277 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
7278 return;
7279 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7280 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
7281 return;
7282 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7283 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
7284 return;
7285 }
7286 break;
7287 }
7288 case AArch64ISD::LD2DUPpost: {
7289 if (VT == MVT::v8i8) {
7290 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
7291 return;
7292 } else if (VT == MVT::v16i8) {
7293 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
7294 return;
7295 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7296 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
7297 return;
7298 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7299 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
7300 return;
7301 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7302 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
7303 return;
7304 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7305 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
7306 return;
7307 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7308 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
7309 return;
7310 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7311 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
7312 return;
7313 }
7314 break;
7315 }
7316 case AArch64ISD::LD3DUPpost: {
7317 if (VT == MVT::v8i8) {
7318 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
7319 return;
7320 } else if (VT == MVT::v16i8) {
7321 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7322 return;
7323 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7324 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7325 return;
7326 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7327 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7328 return;
7329 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7330 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7331 return;
7332 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7333 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7334 return;
7335 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7336 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7337 return;
7338 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7339 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7340 return;
7341 }
7342 break;
7343 }
7344 case AArch64ISD::LD4DUPpost: {
7345 if (VT == MVT::v8i8) {
7346 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7347 return;
7348 } else if (VT == MVT::v16i8) {
7349 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7350 return;
7351 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7352 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7353 return;
7354 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7355 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7356 return;
7357 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7358 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7359 return;
7360 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7361 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7362 return;
7363 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7364 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7365 return;
7366 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7367 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7368 return;
7369 }
7370 break;
7371 }
7372 case AArch64ISD::LD1LANEpost: {
7373 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7374 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7375 return;
7376 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7377 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7378 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7379 return;
7380 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7381 VT == MVT::v2f32) {
7382 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7383 return;
7384 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7385 VT == MVT::v1f64) {
7386 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7387 return;
7388 }
7389 break;
7390 }
7391 case AArch64ISD::LD2LANEpost: {
7392 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7393 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7394 return;
7395 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7396 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7397 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7398 return;
7399 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7400 VT == MVT::v2f32) {
7401 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7402 return;
7403 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7404 VT == MVT::v1f64) {
7405 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7406 return;
7407 }
7408 break;
7409 }
7410 case AArch64ISD::LD3LANEpost: {
7411 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7412 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7413 return;
7414 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7415 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7416 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7417 return;
7418 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7419 VT == MVT::v2f32) {
7420 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7421 return;
7422 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7423 VT == MVT::v1f64) {
7424 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7425 return;
7426 }
7427 break;
7428 }
7429 case AArch64ISD::LD4LANEpost: {
7430 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7431 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7432 return;
7433 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7434 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7435 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7436 return;
7437 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7438 VT == MVT::v2f32) {
7439 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7440 return;
7441 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7442 VT == MVT::v1f64) {
7443 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7444 return;
7445 }
7446 break;
7447 }
7448 case AArch64ISD::ST2post: {
7449 VT = Node->getOperand(1).getValueType();
7450 if (VT == MVT::v8i8) {
7451 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7452 return;
7453 } else if (VT == MVT::v16i8) {
7454 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7455 return;
7456 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7457 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7458 return;
7459 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7460 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7461 return;
7462 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7463 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7464 return;
7465 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7466 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7467 return;
7468 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7469 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7470 return;
7471 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7472 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7473 return;
7474 }
7475 break;
7476 }
7477 case AArch64ISD::ST3post: {
7478 VT = Node->getOperand(1).getValueType();
7479 if (VT == MVT::v8i8) {
7480 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7481 return;
7482 } else if (VT == MVT::v16i8) {
7483 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7484 return;
7485 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7486 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7487 return;
7488 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7489 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7490 return;
7491 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7492 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7493 return;
7494 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7495 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7496 return;
7497 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7498 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7499 return;
7500 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7501 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7502 return;
7503 }
7504 break;
7505 }
7506 case AArch64ISD::ST4post: {
7507 VT = Node->getOperand(1).getValueType();
7508 if (VT == MVT::v8i8) {
7509 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7510 return;
7511 } else if (VT == MVT::v16i8) {
7512 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7513 return;
7514 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7515 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7516 return;
7517 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7518 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7519 return;
7520 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7521 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7522 return;
7523 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7524 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7525 return;
7526 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7527 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7528 return;
7529 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7530 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7531 return;
7532 }
7533 break;
7534 }
7535 case AArch64ISD::ST1x2post: {
7536 VT = Node->getOperand(1).getValueType();
7537 if (VT == MVT::v8i8) {
7538 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7539 return;
7540 } else if (VT == MVT::v16i8) {
7541 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7542 return;
7543 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7544 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7545 return;
7546 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7547 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7548 return;
7549 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7550 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7551 return;
7552 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7553 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7554 return;
7555 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7556 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7557 return;
7558 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7559 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7560 return;
7561 }
7562 break;
7563 }
7564 case AArch64ISD::ST1x3post: {
7565 VT = Node->getOperand(1).getValueType();
7566 if (VT == MVT::v8i8) {
7567 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7568 return;
7569 } else if (VT == MVT::v16i8) {
7570 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7571 return;
7572 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7573 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7574 return;
7575 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7576 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7577 return;
7578 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7579 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7580 return;
7581 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7582 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7583 return;
7584 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7585 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7586 return;
7587 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7588 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7589 return;
7590 }
7591 break;
7592 }
7593 case AArch64ISD::ST1x4post: {
7594 VT = Node->getOperand(1).getValueType();
7595 if (VT == MVT::v8i8) {
7596 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7597 return;
7598 } else if (VT == MVT::v16i8) {
7599 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7600 return;
7601 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7602 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7603 return;
7604 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7605 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7606 return;
7607 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7608 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7609 return;
7610 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7611 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7612 return;
7613 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7614 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7615 return;
7616 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7617 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7618 return;
7619 }
7620 break;
7621 }
7622 case AArch64ISD::ST2LANEpost: {
7623 VT = Node->getOperand(1).getValueType();
7624 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7625 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7626 return;
7627 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7628 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7629 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7630 return;
7631 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7632 VT == MVT::v2f32) {
7633 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7634 return;
7635 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7636 VT == MVT::v1f64) {
7637 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7638 return;
7639 }
7640 break;
7641 }
7642 case AArch64ISD::ST3LANEpost: {
7643 VT = Node->getOperand(1).getValueType();
7644 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7645 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7646 return;
7647 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7648 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7649 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7650 return;
7651 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7652 VT == MVT::v2f32) {
7653 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7654 return;
7655 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7656 VT == MVT::v1f64) {
7657 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7658 return;
7659 }
7660 break;
7661 }
7662 case AArch64ISD::ST4LANEpost: {
7663 VT = Node->getOperand(1).getValueType();
7664 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7665 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7666 return;
7667 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7668 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7669 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7670 return;
7671 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7672 VT == MVT::v2f32) {
7673 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7674 return;
7675 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7676 VT == MVT::v1f64) {
7677 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7678 return;
7679 }
7680 break;
7681 }
7682 }
7683
7684 // Select the default instruction
7685 SelectCode(Node);
7686}
7687
7688/// createAArch64ISelDag - This pass converts a legalized DAG into a
7689/// AArch64-specific DAG, ready for instruction scheduling.
7691 CodeGenOptLevel OptLevel) {
7692 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7693}
7694
7695/// When \p PredVT is a scalable vector predicate in the form
7696/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7697/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7698/// structured vectors (NumVec >1), the output data type is
7699/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7700/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7701/// EVT.
7703 unsigned NumVec) {
7704 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7705 if (!PredVT.isScalableVectorOf(MVT::i1))
7706 return EVT();
7707
7708 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7709 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7710 return EVT();
7711
7712 ElementCount EC = PredVT.getVectorElementCount();
7713 EVT ScalarVT =
7714 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7715 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7716
7717 return MemVT;
7718}
7719
7720/// Builds an integer vector type large enough to hold \p NumVec instances
7721/// of \p VecVT.
7722static EVT getMultipleVectorType(LLVMContext &Ctx, EVT VecVT, unsigned NumVec) {
7724 VecVT.getVectorElementCount() * NumVec);
7725}
7726
7727/// Return the EVT of the data associated to a memory operation in \p
7728/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7730 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7731 return MemIntr->getMemoryVT();
7732
7733 if (isa<MemSDNode>(Root)) {
7734 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7735
7736 EVT DataVT;
7737 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7738 DataVT = Load->getValueType(0);
7739 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7740 DataVT = Load->getValueType(0);
7741 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7742 DataVT = Store->getValue().getValueType();
7743 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7744 DataVT = Store->getValue().getValueType();
7745 else
7746 llvm_unreachable("Unexpected MemSDNode!");
7747
7748 return DataVT.changeVectorElementType(Ctx, MemVT.getVectorElementType());
7749 }
7750
7751 const unsigned Opcode = Root->getOpcode();
7752 // For custom ISD nodes, we have to look at them individually to extract the
7753 // type of the data moved to/from memory.
7754 switch (Opcode) {
7755 case AArch64ISD::LD1_MERGE_ZERO:
7756 case AArch64ISD::LD1S_MERGE_ZERO:
7757 case AArch64ISD::LDNF1_MERGE_ZERO:
7758 case AArch64ISD::LDNF1S_MERGE_ZERO:
7759 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7760 case AArch64ISD::ST1_PRED:
7761 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7762 default:
7763 break;
7764 }
7765
7766 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7767 return EVT();
7768
7769 switch (Root->getConstantOperandVal(1)) {
7770 default:
7771 return EVT();
7772 case Intrinsic::aarch64_sme_ldr:
7773 case Intrinsic::aarch64_sme_str:
7774 return MVT::nxv16i8;
7775 case Intrinsic::aarch64_sve_prf:
7776 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7777 // width of the predicate.
7779 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7780 case Intrinsic::aarch64_sve_ld2_sret:
7781 case Intrinsic::aarch64_sve_ld2q_sret:
7783 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7784 case Intrinsic::aarch64_sve_st2q:
7786 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7787 case Intrinsic::aarch64_sve_ld3_sret:
7788 case Intrinsic::aarch64_sve_ld3q_sret:
7790 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7791 case Intrinsic::aarch64_sve_st3q:
7793 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7794 case Intrinsic::aarch64_sve_ld4_sret:
7795 case Intrinsic::aarch64_sve_ld4q_sret:
7797 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7798 case Intrinsic::aarch64_sve_st4q:
7800 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7801 case Intrinsic::aarch64_sve_ld1_pn_x2:
7802 case Intrinsic::aarch64_sve_ldnt1_pn_x2:
7803 return getMultipleVectorType(Ctx, Root->getValueType(0),
7804 /*NumVec=*/2);
7805 case Intrinsic::aarch64_sve_ld1_pn_x4:
7806 case Intrinsic::aarch64_sve_ldnt1_pn_x4:
7807 return getMultipleVectorType(Ctx, Root->getValueType(0),
7808 /*NumVec=*/4);
7809 case Intrinsic::aarch64_sve_st1_pn_x2:
7810 case Intrinsic::aarch64_sve_stnt1_pn_x2:
7811 return getMultipleVectorType(Ctx, Root->getOperand(2).getValueType(),
7812 /*NumVec=*/2);
7813 case Intrinsic::aarch64_sve_st1_pn_x4:
7814 case Intrinsic::aarch64_sve_stnt1_pn_x4:
7815 return getMultipleVectorType(Ctx, Root->getOperand(2).getValueType(),
7816 /*NumVec=*/4);
7817 case Intrinsic::aarch64_sve_ld1udq:
7818 case Intrinsic::aarch64_sve_st1dq:
7819 return EVT(MVT::nxv1i64);
7820 case Intrinsic::aarch64_sve_ld1uwq:
7821 case Intrinsic::aarch64_sve_st1wq:
7822 return EVT(MVT::nxv1i32);
7823 }
7824}
7825
7826/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7827/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7828/// where Root is the memory access using N for its address.
7829template <int64_t Min, int64_t Max>
7830bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7831 SDValue &Base,
7832 SDValue &OffImm) {
7833 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7834 const DataLayout &DL = CurDAG->getDataLayout();
7835 const MachineFrameInfo &MFI = MF->getFrameInfo();
7836
7837 if (N.getOpcode() == ISD::FrameIndex) {
7838 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7839 // We can only encode VL scaled offsets, so only fold in frame indexes
7840 // referencing SVE objects.
7841 if (MFI.hasScalableStackID(FI)) {
7842 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7843 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7844 return true;
7845 }
7846
7847 return false;
7848 }
7849
7850 if (MemVT == EVT())
7851 return false;
7852
7853 if (N.getOpcode() != ISD::ADD)
7854 return false;
7855
7856 SDValue VScale = N.getOperand(1);
7857 int64_t MulImm = std::numeric_limits<int64_t>::max();
7858 if (VScale.getOpcode() == ISD::VSCALE) {
7859 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7860 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7861 int64_t ByteOffset = C->getSExtValue();
7862 const auto KnownVScale =
7864
7865 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7866 return false;
7867
7868 MulImm = ByteOffset / KnownVScale;
7869 } else
7870 return false;
7871
7872 TypeSize TS = MemVT.getSizeInBits();
7873 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7874
7875 if ((MulImm % MemWidthBytes) != 0)
7876 return false;
7877
7878 int64_t Offset = MulImm / MemWidthBytes;
7880 return false;
7881
7882 Base = N.getOperand(0);
7883 if (Base.getOpcode() == ISD::FrameIndex) {
7884 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7885 // We can only encode VL scaled offsets, so only fold in frame indexes
7886 // referencing SVE objects.
7887 if (MFI.hasScalableStackID(FI))
7888 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7889 }
7890
7891 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7892 return true;
7893}
7894
7895/// Select register plus register addressing mode for SVE, with scaled
7896/// offset.
7897bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7898 SDValue &Base,
7899 SDValue &Offset) {
7900 if (N.getOpcode() != ISD::ADD)
7901 return false;
7902
7903 // Process an ADD node.
7904 const SDValue LHS = N.getOperand(0);
7905 const SDValue RHS = N.getOperand(1);
7906
7907 // 8 bit data does not come with the SHL node, so it is treated
7908 // separately.
7909 if (Scale == 0) {
7910 Base = LHS;
7911 Offset = RHS;
7912 return true;
7913 }
7914
7915 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7916 int64_t ImmOff = C->getSExtValue();
7917 unsigned Size = 1 << Scale;
7918
7919 // To use the reg+reg addressing mode, the immediate must be a multiple of
7920 // the vector element's byte size.
7921 if (ImmOff % Size)
7922 return false;
7923
7924 SDLoc DL(N);
7925 Base = LHS;
7926 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7927 SDValue Ops[] = {Offset};
7928 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7929 Offset = SDValue(MI, 0);
7930 return true;
7931 }
7932
7933 // Check if the RHS is a shift node with a constant.
7934 if (RHS.getOpcode() != ISD::SHL)
7935 return false;
7936
7937 const SDValue ShiftRHS = RHS.getOperand(1);
7938 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7939 if (C->getZExtValue() == Scale) {
7940 Base = LHS;
7941 Offset = RHS.getOperand(0);
7942 return true;
7943 }
7944
7945 return false;
7946}
7947
7948bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7949 const AArch64TargetLowering *TLI =
7950 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7951
7952 return TLI->isAllActivePredicate(*CurDAG, N);
7953}
7954
7955bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7956 return N.getValueType().isScalableVectorOf(MVT::i1);
7957}
7958
7959bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7961 unsigned Scale) {
7962 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7963 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
7964 int64_t ImmOff = C->getSExtValue();
7965 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7966 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7967 }
7968 return SDValue();
7969 };
7970
7971 if (SDValue C = MatchConstantOffset(N)) {
7972 Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7973 Offset = C;
7974 return true;
7975 }
7976
7977 // Try to untangle an ADD node into a 'reg + offset'
7978 if (CurDAG->isBaseWithConstantOffset(N)) {
7979 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7980 Base = N.getOperand(0);
7981 Offset = C;
7982 return true;
7983 }
7984 }
7985
7986 // By default, just match reg + 0.
7987 Base = N;
7988 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7989 return true;
7990}
7991
7992bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7993 SDValue &Imm) {
7995 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
7996 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
7997 // Check conservatively if the immediate fits the valid range [0, 64).
7998 // Immediate variants for GE and HS definitely need to be decremented
7999 // when lowering the pseudos later, so an immediate of 1 would become 0.
8000 // For the inverse conditions LT and LO we don't know for sure if they
8001 // will need a decrement but should the decision be made to reverse the
8002 // branch condition, we again end up with the need to decrement.
8003 // The same argument holds for LE, LS, GT and HI and possibly
8004 // incremented immediates. This can lead to slightly less optimal
8005 // codegen, e.g. we never codegen the legal case
8006 // cblt w0, #63, A
8007 // because we could end up with the illegal case
8008 // cbge w0, #64, B
8009 // should the decision to reverse the branch direction be made. For the
8010 // lower bound cases this is no problem since we can express comparisons
8011 // against 0 with either tbz/tnbz or using wzr/xzr.
8012 uint64_t LowerBound = 0, UpperBound = 64;
8013 switch (CC) {
8014 case AArch64CC::GE:
8015 case AArch64CC::HS:
8016 case AArch64CC::LT:
8017 case AArch64CC::LO:
8018 LowerBound = 1;
8019 break;
8020 case AArch64CC::LE:
8021 case AArch64CC::LS:
8022 case AArch64CC::GT:
8023 case AArch64CC::HI:
8024 UpperBound = 63;
8025 break;
8026 default:
8027 break;
8028 }
8029
8030 if (CN->getAPIntValue().uge(LowerBound) &&
8031 CN->getAPIntValue().ult(UpperBound)) {
8032 SDLoc DL(N);
8033 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
8034 return true;
8035 }
8036 }
8037
8038 return false;
8039}
8040
8041template <bool MatchCBB>
8042bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
8043 SDValue &ExtType) {
8044
8045 // Use an invalid shift-extend value to indicate we don't need to extend later
8046 if (N.getOpcode() == ISD::AssertZext || N.getOpcode() == ISD::AssertSext) {
8047 EVT Ty = cast<VTSDNode>(N.getOperand(1))->getVT();
8048 if (Ty != (MatchCBB ? MVT::i8 : MVT::i16))
8049 return false;
8050 Reg = N.getOperand(0);
8051 ExtType = CurDAG->getSignedTargetConstant(AArch64_AM::InvalidShiftExtend,
8052 SDLoc(N), MVT::i32);
8053 return true;
8054 }
8055
8057
8058 if ((MatchCBB && (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB)) ||
8059 (!MatchCBB && (ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH))) {
8060 Reg = N.getOperand(0);
8061 ExtType =
8062 CurDAG->getTargetConstant(getExtendEncoding(ET), SDLoc(N), MVT::i32);
8063 return true;
8064 }
8065
8066 return false;
8067}
8068
8069/// Try to fold AArch64 CSEL/FCMP patterns to FMAXNM/FMINNM.
8070///
8071/// This is intentionally done in PreprocessISelDAG rather than DAGCombine:
8072/// doing this earlier based on the defining operation of X can be invalidated
8073/// by later DAG combines. At this point the DAG is being prepared for
8074/// instruction selection, so the use of isKnownNeverSNaN(X) applies to the
8075/// final SDValue being selected.
8076/// Only handles FCMP(X, C) with scalar FP types, where C is a non-NaN constant.
8077/// The nsz requirement is needed only when C is zero, to avoid signed-zero
8078/// mismatches. The never-sNaN check is required because AArch64 FMAXNM/FMINNM
8079/// differ from fcmp+fcsel for signaling NaN inputs.
8080SDValue AArch64DAGToDAGISel::tryFoldCselToFMaxMin(SDNode &N) {
8081 EVT VT = N.getValueType(0);
8082
8083 // Scalar FP only.
8084 if (!VT.isFloatingPoint() || VT.isVector())
8085 return SDValue();
8086
8087 SDValue TVal = N.getOperand(0);
8088 SDValue FVal = N.getOperand(1);
8089 SDValue CCVal = N.getOperand(2);
8090 SDValue Cmp = N.getOperand(3);
8091
8092 if (Cmp.getOpcode() != AArch64ISD::FCMP)
8093 return SDValue();
8094
8095 auto *CC = dyn_cast<ConstantSDNode>(CCVal);
8096 if (!CC)
8097 return SDValue();
8098
8099 SDValue CmpLHS = Cmp.getOperand(0);
8100 SDValue CmpRHS = Cmp.getOperand(1);
8101 unsigned CondCode = CC->getZExtValue();
8102
8103 // Map VT and operation (max/min) to machine opcode.
8104 auto getOpc = [](EVT VT, bool isMax) -> unsigned {
8105 if (VT == MVT::f16)
8106 return isMax ? AArch64::FMAXNMHrr : AArch64::FMINNMHrr;
8107 else if (VT == MVT::f32)
8108 return isMax ? AArch64::FMAXNMSrr : AArch64::FMINNMSrr;
8109 else if (VT == MVT::f64)
8110 return isMax ? AArch64::FMAXNMDrr : AArch64::FMINNMDrr;
8111 else
8112 return 0; // unsupported
8113 };
8114
8115 // Determine whether to use max or min based on condition code and operands.
8116 bool isMax;
8117 if (CondCode == AArch64CC::GT || CondCode == AArch64CC::GE) {
8118 if (TVal == CmpLHS && FVal == CmpRHS)
8119 isMax = true;
8120 else if (TVal == CmpRHS && FVal == CmpLHS)
8121 isMax = false;
8122 else
8123 return SDValue();
8124 } else if (CondCode == AArch64CC::MI || CondCode == AArch64CC::LS) {
8125 if (TVal == CmpLHS && FVal == CmpRHS)
8126 isMax = false;
8127 else if (TVal == CmpRHS && FVal == CmpLHS)
8128 isMax = true;
8129 else
8130 return SDValue();
8131 } else {
8132 return SDValue();
8133 }
8134
8135 // Get the machine opcode for this VT and operation.
8136 unsigned Opc = getOpc(VT, isMax);
8137 if (!Opc)
8138 return SDValue();
8139
8140 // Constant must be non-NaN.
8141 auto *CFP = dyn_cast<ConstantFPSDNode>(CmpRHS);
8142 if (!CFP || CFP->getValueAPF().isNaN())
8143 return SDValue();
8144
8145 // nsz flag required only when constant is zero: fmaxnm(+0,-0)=+0 differs from
8146 // fcmp+select's -0. For non-zero constants, semantics are identical.
8147 if (CFP->isZero() && !N.getFlags().hasNoSignedZeros())
8148 return SDValue();
8149
8150 // Only fold if variable operand is never sNaN.
8151 // This runs after DAG combines, so later combines cannot remove a defining
8152 // operation used by isKnownNeverSNaN().
8153 if (!CurDAG->isKnownNeverSNaN(CmpLHS))
8154 return SDValue();
8155
8156 SDLoc DL(&N);
8157
8158 // Directly emit the machine node
8159 return SDValue(CurDAG->getMachineNode(Opc, DL, VT, CmpLHS, CmpRHS), 0);
8160}
8161
8162void AArch64DAGToDAGISel::PreprocessISelDAG() {
8163 bool MadeChange = false;
8164 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
8165 if (N.use_empty())
8166 continue;
8167
8169 switch (N.getOpcode()) {
8170 case ISD::SCALAR_TO_VECTOR: {
8171 EVT ScalarTy = N.getValueType(0).getVectorElementType();
8172 if ((ScalarTy == MVT::i32 || ScalarTy == MVT::i64) &&
8173 ScalarTy == N.getOperand(0).getValueType())
8174 Result = addBitcastHints(*CurDAG, N);
8175
8176 break;
8177 }
8178 case AArch64ISD::CSEL:
8179 Result = tryFoldCselToFMaxMin(N);
8180 break;
8181 default:
8182 break;
8183 }
8184
8185 if (Result) {
8186 LLVM_DEBUG(dbgs() << "AArch64 DAG preprocessing replacing:\nOld: ");
8187 LLVM_DEBUG(N.dump(CurDAG));
8188 LLVM_DEBUG(dbgs() << "\nNew: ");
8189 LLVM_DEBUG(Result.dump(CurDAG));
8190 LLVM_DEBUG(dbgs() << "\n");
8191
8192 CurDAG->ReplaceAllUsesOfValueWith(SDValue(&N, 0), Result);
8193 MadeChange = true;
8194 }
8195 }
8196
8197 if (MadeChange)
8198 CurDAG->RemoveDeadNodes();
8199
8201}
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static std::optional< APInt > GetNEONSplatValue(SDValue N)
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static SDValue addBitcastHints(SelectionDAG &DAG, SDNode &N)
addBitcastHints - This method adds bitcast hints to the operands of a node to help instruction select...
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static std::optional< APInt > DecodeNEONSplat(SDValue N)
static bool checkCVTFixedPointOperandWithFBitsForVectors(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static APInt DecodeFMOVImm(uint64_t Imm, unsigned RegWidth)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static EVT getMultipleVectorType(LLVMContext &Ctx, EVT VecVT, unsigned NumVec)
Builds an integer vector type large enough to hold NumVec instances of VecVT.
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
return SDValue()
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:484
#define LLVM_DEBUG(...)
Definition Debug.h:119
#define PASS_NAME
Value * RHS
Value * LHS
AArch64DAGToDAGISelPass(AArch64TargetMachine &TM)
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
bool isStreaming() const
Returns true if the function has a streaming body.
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
unsigned getSVEVectorSizeInBits() const
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
Register matchRegisterName(StringRef RegName) const
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1693
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1475
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
iterator begin() const
Definition ArrayRef.h:129
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasScalableStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISelPass(std::unique_ptr< SelectionDAGISel > Selector)
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual void PreprocessISelDAG()
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getID() const
Return the register class ID number.
LLVM Value Representation.
Definition Value.h:75
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:987
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t decodeAdvSIMDModImmType12(uint8_t Imm)
constexpr bool isLegalArithImmed(const uint64_t C)
isLegalArithImmed -
static uint64_t decodeAdvSIMDModImmType11(uint8_t Imm)
unsigned getExtendEncoding(AArch64_AM::ShiftExtendType ET)
Mapping from extend bits to required operation: shifter: 000 ==> uxtb 001 ==> uxth 010 ==> uxtw 011 =...
static uint64_t decodeAdvSIMDModImmType10(uint8_t Imm)
static bool isSVELogicalImm(unsigned SizeInBits, uint64_t ImmVal, uint64_t &Encoding)
constexpr unsigned getArithImmedShift(const uint64_t C)
getArithImmedShift - assumes C is a legal immediate for arithmetic instructions and
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
isSignExtendShiftType - Returns true if Type is sign extending.
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Not(const Pred &P) -> Not< Pred >
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:558
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
unsigned CheckFixedPointOperandConstant(APFloat &FVal, unsigned RegWidth, bool isReciprocal)
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2025
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:860
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
Extended Value Type.
Definition ValueTypes.h:35
bool isScalableVectorOf(EVT EltVT) const
Return true if this is a scalable vector with matching element type.
Definition ValueTypes.h:192
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:129
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
ElementCount getVectorElementCount() const
Definition ValueTypes.h:373
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:494
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:382
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:230
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:404
bool isFixedLengthVector() const
Definition ValueTypes.h:199
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:346
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:187
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:225
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.